fix: try model fallback before throwing RESOURCE_EXHAUSTED error
When all accounts are rate-limited for > 2 minutes, now attempts model fallback (if enabled) before throwing the error. This allows quota-exhausted accounts to gracefully fall back to alternate models. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -55,8 +55,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
||||||
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
||||||
|
|
||||||
// If wait time is too long (> 2 minutes), throw error immediately
|
// If wait time is too long (> 2 minutes), try fallback first, then throw error
|
||||||
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
||||||
|
// Check if fallback is enabled and available
|
||||||
|
if (fallbackEnabled) {
|
||||||
|
const fallbackModel = getFallbackModel(model);
|
||||||
|
if (fallbackModel) {
|
||||||
|
logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel}`);
|
||||||
|
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
||||||
|
return await sendMessage(fallbackRequest, accountManager, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
||||||
);
|
);
|
||||||
@@ -70,15 +79,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
continue; // Retry the loop
|
continue; // Retry the loop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if fallback is enabled and available
|
// No accounts available and not rate-limited (shouldn't happen normally)
|
||||||
if (fallbackEnabled) {
|
|
||||||
const fallbackModel = getFallbackModel(model);
|
|
||||||
if (fallbackModel) {
|
|
||||||
logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
|
|
||||||
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
||||||
return await sendMessage(fallbackRequest, accountManager, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new Error('No accounts available');
|
throw new Error('No accounts available');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -54,8 +54,18 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
||||||
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
||||||
|
|
||||||
// If wait time is too long (> 2 minutes), throw error immediately
|
// If wait time is too long (> 2 minutes), try fallback first, then throw error
|
||||||
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
||||||
|
// Check if fallback is enabled and available
|
||||||
|
if (fallbackEnabled) {
|
||||||
|
const fallbackModel = getFallbackModel(model);
|
||||||
|
if (fallbackModel) {
|
||||||
|
logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel} (streaming)`);
|
||||||
|
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
||||||
|
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
||||||
);
|
);
|
||||||
@@ -69,16 +79,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
continue; // Retry the loop
|
continue; // Retry the loop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if fallback is enabled and available
|
// No accounts available and not rate-limited (shouldn't happen normally)
|
||||||
if (fallbackEnabled) {
|
|
||||||
const fallbackModel = getFallbackModel(model);
|
|
||||||
if (fallbackModel) {
|
|
||||||
logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
|
|
||||||
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
||||||
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new Error('No accounts available');
|
throw new Error('No accounts available');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user