Retry a few times with progressive delays before giving up
This commit is contained in:
@@ -159,9 +159,9 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
|
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
|
||||||
generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
|
generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
|
||||||
|
|
||||||
# Retry logic with exponential backoff
|
# Retry logic with progressive delays
|
||||||
max_retries = 2 # Total of 2 attempts (1 initial + 1 retry)
|
max_retries = 4 # Total of 4 attempts
|
||||||
base_delay = 1.0 # Start with 1 second delay
|
retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s
|
||||||
|
|
||||||
last_exception = None
|
last_exception = None
|
||||||
|
|
||||||
@@ -217,11 +217,13 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
if attempt == max_retries - 1 or not is_retryable:
|
if attempt == max_retries - 1 or not is_retryable:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Calculate delay with exponential backoff
|
# Get progressive delay
|
||||||
delay = base_delay * (2**attempt)
|
delay = retry_delays[attempt]
|
||||||
|
|
||||||
# Log retry attempt (could add logging here if needed)
|
# Log retry attempt
|
||||||
# For now, just sleep and retry
|
logger.warning(
|
||||||
|
f"Gemini API error for model {resolved_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
|
||||||
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
||||||
# If we get here, all retries failed
|
# If we get here, all retries failed
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import base64
|
|||||||
import ipaddress
|
import ipaddress
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
@@ -300,33 +301,76 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
|
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
|
||||||
completion_params[key] = value
|
completion_params[key] = value
|
||||||
|
|
||||||
try:
|
# Retry logic with progressive delays
|
||||||
# Generate completion
|
max_retries = 4 # Total of 4 attempts
|
||||||
response = self.client.chat.completions.create(**completion_params)
|
retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s
|
||||||
|
|
||||||
# Extract content and usage
|
last_exception = None
|
||||||
content = response.choices[0].message.content
|
|
||||||
usage = self._extract_usage(response)
|
|
||||||
|
|
||||||
return ModelResponse(
|
for attempt in range(max_retries):
|
||||||
content=content,
|
try:
|
||||||
usage=usage,
|
# Generate completion
|
||||||
model_name=model_name,
|
response = self.client.chat.completions.create(**completion_params)
|
||||||
friendly_name=self.FRIENDLY_NAME,
|
|
||||||
provider=self.get_provider_type(),
|
|
||||||
metadata={
|
|
||||||
"finish_reason": response.choices[0].finish_reason,
|
|
||||||
"model": response.model, # Actual model used
|
|
||||||
"id": response.id,
|
|
||||||
"created": response.created,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
# Extract content and usage
|
||||||
# Log error and re-raise with more context
|
content = response.choices[0].message.content
|
||||||
error_msg = f"{self.FRIENDLY_NAME} API error for model {model_name}: {str(e)}"
|
usage = self._extract_usage(response)
|
||||||
logging.error(error_msg)
|
|
||||||
raise RuntimeError(error_msg) from e
|
return ModelResponse(
|
||||||
|
content=content,
|
||||||
|
usage=usage,
|
||||||
|
model_name=model_name,
|
||||||
|
friendly_name=self.FRIENDLY_NAME,
|
||||||
|
provider=self.get_provider_type(),
|
||||||
|
metadata={
|
||||||
|
"finish_reason": response.choices[0].finish_reason,
|
||||||
|
"model": response.model, # Actual model used
|
||||||
|
"id": response.id,
|
||||||
|
"created": response.created,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
last_exception = e
|
||||||
|
|
||||||
|
# Check if this is a retryable error
|
||||||
|
error_str = str(e).lower()
|
||||||
|
is_retryable = any(
|
||||||
|
term in error_str
|
||||||
|
for term in [
|
||||||
|
"timeout",
|
||||||
|
"connection",
|
||||||
|
"network",
|
||||||
|
"temporary",
|
||||||
|
"unavailable",
|
||||||
|
"retry",
|
||||||
|
"429",
|
||||||
|
"500",
|
||||||
|
"502",
|
||||||
|
"503",
|
||||||
|
"504",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# If this is the last attempt or not retryable, give up
|
||||||
|
if attempt == max_retries - 1 or not is_retryable:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get progressive delay
|
||||||
|
delay = retry_delays[attempt]
|
||||||
|
|
||||||
|
# Log retry attempt
|
||||||
|
logging.warning(
|
||||||
|
f"{self.FRIENDLY_NAME} API error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
# If we get here, all retries failed
|
||||||
|
error_msg = (
|
||||||
|
f"{self.FRIENDLY_NAME} API error for model {model_name} after {max_retries} attempts: {str(last_exception)}"
|
||||||
|
)
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise RuntimeError(error_msg) from last_exception
|
||||||
|
|
||||||
def count_tokens(self, text: str, model_name: str) -> int:
|
def count_tokens(self, text: str, model_name: str) -> int:
|
||||||
"""Count tokens for the given text.
|
"""Count tokens for the given text.
|
||||||
|
|||||||
Reference in New Issue
Block a user