Retry a few times with progressive delays before giving up

This commit is contained in:
Fahad
2025-06-16 17:47:42 +04:00
parent 6b09f1468f
commit ebfda1862e
2 changed files with 77 additions and 31 deletions

View File

@@ -159,9 +159,9 @@ class GeminiModelProvider(ModelProvider):
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode]) actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget) generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
# Retry logic with exponential backoff # Retry logic with progressive delays
max_retries = 2 # Total of 2 attempts (1 initial + 1 retry) max_retries = 4 # Total of 4 attempts
base_delay = 1.0 # Start with 1 second delay retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s
last_exception = None last_exception = None
@@ -217,11 +217,13 @@ class GeminiModelProvider(ModelProvider):
if attempt == max_retries - 1 or not is_retryable: if attempt == max_retries - 1 or not is_retryable:
break break
# Calculate delay with exponential backoff # Get progressive delay
delay = base_delay * (2**attempt) delay = retry_delays[attempt]
# Log retry attempt (could add logging here if needed) # Log retry attempt
# For now, just sleep and retry logger.warning(
f"Gemini API error for model {resolved_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
)
time.sleep(delay) time.sleep(delay)
# If we get here, all retries failed # If we get here, all retries failed

View File

@@ -4,6 +4,7 @@ import base64
import ipaddress import ipaddress
import logging import logging
import os import os
import time
from abc import abstractmethod from abc import abstractmethod
from typing import Optional from typing import Optional
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -300,6 +301,13 @@ class OpenAICompatibleProvider(ModelProvider):
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]: if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
completion_params[key] = value completion_params[key] = value
# Retry logic with progressive delays
max_retries = 4 # Total of 4 attempts
retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s
last_exception = None
for attempt in range(max_retries):
try: try:
# Generate completion # Generate completion
response = self.client.chat.completions.create(**completion_params) response = self.client.chat.completions.create(**completion_params)
@@ -323,10 +331,46 @@ class OpenAICompatibleProvider(ModelProvider):
) )
except Exception as e: except Exception as e:
# Log error and re-raise with more context last_exception = e
error_msg = f"{self.FRIENDLY_NAME} API error for model {model_name}: {str(e)}"
# Check if this is a retryable error
error_str = str(e).lower()
is_retryable = any(
term in error_str
for term in [
"timeout",
"connection",
"network",
"temporary",
"unavailable",
"retry",
"429",
"500",
"502",
"503",
"504",
]
)
# If this is the last attempt or not retryable, give up
if attempt == max_retries - 1 or not is_retryable:
break
# Get progressive delay
delay = retry_delays[attempt]
# Log retry attempt
logging.warning(
f"{self.FRIENDLY_NAME} API error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
)
time.sleep(delay)
# If we get here, all retries failed
error_msg = (
f"{self.FRIENDLY_NAME} API error for model {model_name} after {max_retries} attempts: {str(last_exception)}"
)
logging.error(error_msg) logging.error(error_msg)
raise RuntimeError(error_msg) from e raise RuntimeError(error_msg) from last_exception
def count_tokens(self, text: str, model_name: str) -> int: def count_tokens(self, text: str, model_name: str) -> int:
"""Count tokens for the given text. """Count tokens for the given text.