handle rate limits gracefully, and add ability to check google server for every request in case they reset rate limits on their end, thereby not relying on local cache alone

This commit is contained in:
Badri Narayanan S
2025-12-21 14:49:57 +05:30
parent f625377bdf
commit 95c08f9d55
3 changed files with 61 additions and 12 deletions

View File

@@ -204,6 +204,21 @@ export class AccountManager {
return cleared; return cleared;
} }
/**
* Clear all rate limits to force a fresh check
* (Optimistic retry strategy)
*/
resetAllRateLimits() {
for (const account of this.#accounts) {
account.isRateLimited = false;
// distinct from "clearing" expired limits, we blindly reset here
// we keep the time? User said "clear isRateLimited value, and rateLimitResetTime"
// So we clear both.
account.rateLimitResetTime = null;
}
console.log('[AccountManager] Reset all rate limits for optimistic retry');
}
/** /**
* Pick the next available account (round-robin) * Pick the next available account (round-robin)
*/ */

View File

@@ -111,12 +111,21 @@ function parseResetTime(responseOrError, errorText = '') {
if (!resetMs) { if (!resetMs) {
const msg = (responseOrError instanceof Error ? responseOrError.message : errorText) || ''; const msg = (responseOrError instanceof Error ? responseOrError.message : errorText) || '';
// Try to extract "retry-after-ms" or "retryDelay" in ms // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
const msMatch = msg.match(/retry[-_]?after[-_]?ms[:\s"]+(\d+)/i) || const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d\.]+)(?:s\b|s")/i);
msg.match(/retryDelay[:\s"]+(\d+)/i); if (secMatch) {
if (msMatch) { resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
resetMs = parseInt(msMatch[1], 10); console.log(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);
console.log(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`); }
if (!resetMs) {
// Check for ms (explicit "ms" suffix or implicit if no suffix)
// Rejects "s" suffix or floats (handled above)
const msMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+(\d+)(?:\s*ms)?(?![\w.])/i);
if (msMatch) {
resetMs = parseInt(msMatch[1], 10);
console.log(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`);
}
} }
// Try to extract seconds value like "retry after 60 seconds" // Try to extract seconds value like "retry after 60 seconds"
@@ -226,7 +235,11 @@ export async function sendMessage(anthropicRequest, accountManager) {
const isThinkingModel = model.toLowerCase().includes('thinking'); const isThinkingModel = model.toLowerCase().includes('thinking');
// Retry loop with account failover // Retry loop with account failover
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { // Ensure we try at least as many times as there are accounts to cycle through everyone
// +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
for (let attempt = 0; attempt < maxAttempts; attempt++) {
// Get next available account // Get next available account
let account = accountManager.pickNext(); let account = accountManager.pickNext();
@@ -234,6 +247,14 @@ export async function sendMessage(anthropicRequest, accountManager) {
if (!account) { if (!account) {
if (accountManager.isAllRateLimited()) { if (accountManager.isAllRateLimited()) {
const waitMs = accountManager.getMinWaitTimeMs(); const waitMs = accountManager.getMinWaitTimeMs();
const resetTime = new Date(Date.now() + waitMs).toISOString();
// If wait time is too long (> 2 minutes), throw error immediately
if (waitMs > 120000) {
throw new Error(
`RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}`
);
}
if (accountManager.getAccountCount() === 1) { if (accountManager.getAccountCount() === 1) {
// Single account mode: wait for reset // Single account mode: wait for reset
@@ -243,7 +264,6 @@ export async function sendMessage(anthropicRequest, accountManager) {
account = accountManager.pickNext(); account = accountManager.pickNext();
} else { } else {
// Multi-account: all exhausted - throw proper error // Multi-account: all exhausted - throw proper error
const resetTime = new Date(Date.now() + waitMs).toISOString();
throw new Error( throw new Error(
`RESOURCE_EXHAUSTED: All ${accountManager.getAccountCount()} accounts rate-limited. ` + `RESOURCE_EXHAUSTED: All ${accountManager.getAccountCount()} accounts rate-limited. ` +
`quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}` `quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}`
@@ -468,7 +488,11 @@ export async function* sendMessageStream(anthropicRequest, accountManager) {
const model = mapModelName(anthropicRequest.model); const model = mapModelName(anthropicRequest.model);
// Retry loop with account failover // Retry loop with account failover
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { // Ensure we try at least as many times as there are accounts to cycle through everyone
// +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
for (let attempt = 0; attempt < maxAttempts; attempt++) {
// Get next available account // Get next available account
let account = accountManager.pickNext(); let account = accountManager.pickNext();
@@ -476,6 +500,14 @@ export async function* sendMessageStream(anthropicRequest, accountManager) {
if (!account) { if (!account) {
if (accountManager.isAllRateLimited()) { if (accountManager.isAllRateLimited()) {
const waitMs = accountManager.getMinWaitTimeMs(); const waitMs = accountManager.getMinWaitTimeMs();
const resetTime = new Date(Date.now() + waitMs).toISOString();
// If wait time is too long (> 2 minutes), throw error immediately
if (waitMs > 120000) {
throw new Error(
`RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}`
);
}
if (accountManager.getAccountCount() === 1) { if (accountManager.getAccountCount() === 1) {
// Single account mode: wait for reset // Single account mode: wait for reset
@@ -485,7 +517,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager) {
account = accountManager.pickNext(); account = accountManager.pickNext();
} else { } else {
// Multi-account: all exhausted - throw proper error // Multi-account: all exhausted - throw proper error
const resetTime = new Date(Date.now() + waitMs).toISOString();
throw new Error( throw new Error(
`RESOURCE_EXHAUSTED: All ${accountManager.getAccountCount()} accounts rate-limited. ` + `RESOURCE_EXHAUSTED: All ${accountManager.getAccountCount()} accounts rate-limited. ` +
`quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}` `quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}`

View File

@@ -55,8 +55,8 @@ function parseError(error) {
statusCode = 401; statusCode = 401;
errorMessage = 'Authentication failed. Make sure Antigravity is running with a valid token.'; errorMessage = 'Authentication failed. Make sure Antigravity is running with a valid token.';
} else if (error.message.includes('429') || error.message.includes('RESOURCE_EXHAUSTED') || error.message.includes('QUOTA_EXHAUSTED')) { } else if (error.message.includes('429') || error.message.includes('RESOURCE_EXHAUSTED') || error.message.includes('QUOTA_EXHAUSTED')) {
errorType = 'overloaded_error'; // Claude Code recognizes this type errorType = 'invalid_request_error'; // Use invalid_request_error to force client to purge/stop
statusCode = 529; // Use 529 for overloaded (Claude API convention) statusCode = 400; // Use 400 to ensure client does not retry (429 and 529 trigger retries)
// Try to extract the quota reset time from the error // Try to extract the quota reset time from the error
const resetMatch = error.message.match(/quota will reset after (\d+h\d+m\d+s|\d+m\d+s|\d+s)/i); const resetMatch = error.message.match(/quota will reset after (\d+h\d+m\d+s|\d+m\d+s|\d+s)/i);
@@ -191,6 +191,9 @@ app.post('/v1/messages', async (req, res) => {
// Ensure account manager is initialized // Ensure account manager is initialized
await ensureInitialized(); await ensureInitialized();
// Optimistic Retry: Reset all local rate limits to force a fresh check on Google's side
accountManager.resetAllRateLimits();
const { const {
model, model,
messages, messages,