From 95c08f9d555c901cf1e121ab959aafbb53721ca9 Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sun, 21 Dec 2025 14:49:57 +0530 Subject: [PATCH] handle rate limits gracefully, and add ability to check google server for every request in case they reset rate limits on their end, thereby not relying on local cache alone --- src/account-manager.js | 15 ++++++++++++ src/cloudcode-client.js | 51 +++++++++++++++++++++++++++++++++-------- src/server.js | 7 ++++-- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/account-manager.js b/src/account-manager.js index 58c0640..1a706dd 100644 --- a/src/account-manager.js +++ b/src/account-manager.js @@ -204,6 +204,21 @@ export class AccountManager { return cleared; } + /** + * Clear all rate limits to force a fresh check + * (Optimistic retry strategy) + */ + resetAllRateLimits() { + for (const account of this.#accounts) { + account.isRateLimited = false; + // distinct from "clearing" expired limits, we blindly reset here + // we keep the time? User said "clear isRateLimited value, and rateLimitResetTime" + // So we clear both. + account.rateLimitResetTime = null; + } + console.log('[AccountManager] Reset all rate limits for optimistic retry'); + } + /** * Pick the next available account (round-robin) */ diff --git a/src/cloudcode-client.js b/src/cloudcode-client.js index f48173b..e19bac6 100644 --- a/src/cloudcode-client.js +++ b/src/cloudcode-client.js @@ -111,12 +111,21 @@ function parseResetTime(responseOrError, errorText = '') { if (!resetMs) { const msg = (responseOrError instanceof Error ? responseOrError.message : errorText) || ''; - // Try to extract "retry-after-ms" or "retryDelay" in ms - const msMatch = msg.match(/retry[-_]?after[-_]?ms[:\s"]+(\d+)/i) || - msg.match(/retryDelay[:\s"]+(\d+)/i); - if (msMatch) { - resetMs = parseInt(msMatch[1], 10); - console.log(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`); + // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s") + const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d\.]+)(?:s\b|s")/i); + if (secMatch) { + resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000); + console.log(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`); + } + + if (!resetMs) { + // Check for ms (explicit "ms" suffix or implicit if no suffix) + // Rejects "s" suffix or floats (handled above) + const msMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+(\d+)(?:\s*ms)?(?![\w.])/i); + if (msMatch) { + resetMs = parseInt(msMatch[1], 10); + console.log(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`); + } } // Try to extract seconds value like "retry after 60 seconds" @@ -226,7 +235,11 @@ export async function sendMessage(anthropicRequest, accountManager) { const isThinkingModel = model.toLowerCase().includes('thinking'); // Retry loop with account failover - for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + // Ensure we try at least as many times as there are accounts to cycle through everyone + // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop + const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1); + + for (let attempt = 0; attempt < maxAttempts; attempt++) { // Get next available account let account = accountManager.pickNext(); @@ -234,6 +247,14 @@ export async function sendMessage(anthropicRequest, accountManager) { if (!account) { if (accountManager.isAllRateLimited()) { const waitMs = accountManager.getMinWaitTimeMs(); + const resetTime = new Date(Date.now() + waitMs).toISOString(); + + // If wait time is too long (> 2 minutes), throw error immediately + if (waitMs > 120000) { + throw new Error( + `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}` + ); + } if (accountManager.getAccountCount() === 1) { // Single account mode: wait for reset @@ -243,7 +264,6 @@ export async function sendMessage(anthropicRequest, accountManager) { account = accountManager.pickNext(); } else { // Multi-account: all exhausted - throw proper error - const resetTime = new Date(Date.now() + waitMs).toISOString(); throw new Error( `RESOURCE_EXHAUSTED: All ${accountManager.getAccountCount()} accounts rate-limited. ` + `quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}` @@ -468,7 +488,11 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { const model = mapModelName(anthropicRequest.model); // Retry loop with account failover - for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + // Ensure we try at least as many times as there are accounts to cycle through everyone + // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop + const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1); + + for (let attempt = 0; attempt < maxAttempts; attempt++) { // Get next available account let account = accountManager.pickNext(); @@ -476,6 +500,14 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { if (!account) { if (accountManager.isAllRateLimited()) { const waitMs = accountManager.getMinWaitTimeMs(); + const resetTime = new Date(Date.now() + waitMs).toISOString(); + + // If wait time is too long (> 2 minutes), throw error immediately + if (waitMs > 120000) { + throw new Error( + `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}` + ); + } if (accountManager.getAccountCount() === 1) { // Single account mode: wait for reset @@ -485,7 +517,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { account = accountManager.pickNext(); } else { // Multi-account: all exhausted - throw proper error - const resetTime = new Date(Date.now() + waitMs).toISOString(); throw new Error( `RESOURCE_EXHAUSTED: All ${accountManager.getAccountCount()} accounts rate-limited. ` + `quota will reset after ${formatDuration(waitMs)}. Next available: ${resetTime}` diff --git a/src/server.js b/src/server.js index 4c25b80..e75596c 100644 --- a/src/server.js +++ b/src/server.js @@ -55,8 +55,8 @@ function parseError(error) { statusCode = 401; errorMessage = 'Authentication failed. Make sure Antigravity is running with a valid token.'; } else if (error.message.includes('429') || error.message.includes('RESOURCE_EXHAUSTED') || error.message.includes('QUOTA_EXHAUSTED')) { - errorType = 'overloaded_error'; // Claude Code recognizes this type - statusCode = 529; // Use 529 for overloaded (Claude API convention) + errorType = 'invalid_request_error'; // Use invalid_request_error to force client to purge/stop + statusCode = 400; // Use 400 to ensure client does not retry (429 and 529 trigger retries) // Try to extract the quota reset time from the error const resetMatch = error.message.match(/quota will reset after (\d+h\d+m\d+s|\d+m\d+s|\d+s)/i); @@ -191,6 +191,9 @@ app.post('/v1/messages', async (req, res) => { // Ensure account manager is initialized await ensureInitialized(); + // Optimistic Retry: Reset all local rate limits to force a fresh check on Google's side + accountManager.resetAllRateLimits(); + const { model, messages,