From ebfda1862e0f3baad261945d8545e138a9776a29 Mon Sep 17 00:00:00 2001 From: Fahad Date: Mon, 16 Jun 2025 17:47:42 +0400 Subject: [PATCH] Retry a few times with progressive delays before giving up --- providers/gemini.py | 16 +++--- providers/openai_compatible.py | 92 +++++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 31 deletions(-) diff --git a/providers/gemini.py b/providers/gemini.py index 0972e89..8922dc6 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -159,9 +159,9 @@ class GeminiModelProvider(ModelProvider): actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode]) generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget) - # Retry logic with exponential backoff - max_retries = 2 # Total of 2 attempts (1 initial + 1 retry) - base_delay = 1.0 # Start with 1 second delay + # Retry logic with progressive delays + max_retries = 4 # Total of 4 attempts + retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s last_exception = None @@ -217,11 +217,13 @@ class GeminiModelProvider(ModelProvider): if attempt == max_retries - 1 or not is_retryable: break - # Calculate delay with exponential backoff - delay = base_delay * (2**attempt) + # Get progressive delay + delay = retry_delays[attempt] - # Log retry attempt (could add logging here if needed) - # For now, just sleep and retry + # Log retry attempt + logger.warning( + f"Gemini API error for model {resolved_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..." + ) time.sleep(delay) # If we get here, all retries failed diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index 9e0b02b..2db8f92 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -4,6 +4,7 @@ import base64 import ipaddress import logging import os +import time from abc import abstractmethod from typing import Optional from urllib.parse import urlparse @@ -300,33 +301,76 @@ class OpenAICompatibleProvider(ModelProvider): if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]: completion_params[key] = value - try: - # Generate completion - response = self.client.chat.completions.create(**completion_params) + # Retry logic with progressive delays + max_retries = 4 # Total of 4 attempts + retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s - # Extract content and usage - content = response.choices[0].message.content - usage = self._extract_usage(response) + last_exception = None - return ModelResponse( - content=content, - usage=usage, - model_name=model_name, - friendly_name=self.FRIENDLY_NAME, - provider=self.get_provider_type(), - metadata={ - "finish_reason": response.choices[0].finish_reason, - "model": response.model, # Actual model used - "id": response.id, - "created": response.created, - }, - ) + for attempt in range(max_retries): + try: + # Generate completion + response = self.client.chat.completions.create(**completion_params) - except Exception as e: - # Log error and re-raise with more context - error_msg = f"{self.FRIENDLY_NAME} API error for model {model_name}: {str(e)}" - logging.error(error_msg) - raise RuntimeError(error_msg) from e + # Extract content and usage + content = response.choices[0].message.content + usage = self._extract_usage(response) + + return ModelResponse( + content=content, + usage=usage, + model_name=model_name, + friendly_name=self.FRIENDLY_NAME, + provider=self.get_provider_type(), + metadata={ + "finish_reason": response.choices[0].finish_reason, + "model": response.model, # Actual model used + "id": response.id, + "created": response.created, + }, + ) + + except Exception as e: + last_exception = e + + # Check if this is a retryable error + error_str = str(e).lower() + is_retryable = any( + term in error_str + for term in [ + "timeout", + "connection", + "network", + "temporary", + "unavailable", + "retry", + "429", + "500", + "502", + "503", + "504", + ] + ) + + # If this is the last attempt or not retryable, give up + if attempt == max_retries - 1 or not is_retryable: + break + + # Get progressive delay + delay = retry_delays[attempt] + + # Log retry attempt + logging.warning( + f"{self.FRIENDLY_NAME} API error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..." + ) + time.sleep(delay) + + # If we get here, all retries failed + error_msg = ( + f"{self.FRIENDLY_NAME} API error for model {model_name} after {max_retries} attempts: {str(last_exception)}" + ) + logging.error(error_msg) + raise RuntimeError(error_msg) from last_exception def count_tokens(self, text: str, model_name: str) -> int: """Count tokens for the given text.