fix: try model fallback before throwing RESOURCE_EXHAUSTED error

When all accounts are rate-limited for > 2 minutes, now attempts
model fallback (if enabled) before throwing the error. This allows
quota-exhausted accounts to gracefully fall back to alternate models.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2026-01-15 16:41:02 +05:30
parent 77363c679e
commit 2a0c110f9b
2 changed files with 23 additions and 21 deletions

View File

@@ -55,8 +55,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
const minWaitMs = accountManager.getMinWaitTimeMs(model); const minWaitMs = accountManager.getMinWaitTimeMs(model);
const resetTime = new Date(Date.now() + minWaitMs).toISOString(); const resetTime = new Date(Date.now() + minWaitMs).toISOString();
// If wait time is too long (> 2 minutes), throw error immediately // If wait time is too long (> 2 minutes), try fallback first, then throw error
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
// Check if fallback is enabled and available
if (fallbackEnabled) {
const fallbackModel = getFallbackModel(model);
if (fallbackModel) {
logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel}`);
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
return await sendMessage(fallbackRequest, accountManager, false);
}
}
throw new Error( throw new Error(
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}` `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
); );
@@ -70,15 +79,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
continue; // Retry the loop continue; // Retry the loop
} }
// Check if fallback is enabled and available // No accounts available and not rate-limited (shouldn't happen normally)
if (fallbackEnabled) {
const fallbackModel = getFallbackModel(model);
if (fallbackModel) {
logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
return await sendMessage(fallbackRequest, accountManager, false);
}
}
throw new Error('No accounts available'); throw new Error('No accounts available');
} }

View File

@@ -54,8 +54,18 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
const minWaitMs = accountManager.getMinWaitTimeMs(model); const minWaitMs = accountManager.getMinWaitTimeMs(model);
const resetTime = new Date(Date.now() + minWaitMs).toISOString(); const resetTime = new Date(Date.now() + minWaitMs).toISOString();
// If wait time is too long (> 2 minutes), throw error immediately // If wait time is too long (> 2 minutes), try fallback first, then throw error
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
// Check if fallback is enabled and available
if (fallbackEnabled) {
const fallbackModel = getFallbackModel(model);
if (fallbackModel) {
logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel} (streaming)`);
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
yield* sendMessageStream(fallbackRequest, accountManager, false);
return;
}
}
throw new Error( throw new Error(
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}` `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
); );
@@ -69,16 +79,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
continue; // Retry the loop continue; // Retry the loop
} }
// Check if fallback is enabled and available // No accounts available and not rate-limited (shouldn't happen normally)
if (fallbackEnabled) {
const fallbackModel = getFallbackModel(model);
if (fallbackModel) {
logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
yield* sendMessageStream(fallbackRequest, accountManager, false);
return;
}
}
throw new Error('No accounts available'); throw new Error('No accounts available');
} }