From 0fa945b0697b1fda04ccbff7013ee5608ac6cdb0 Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Fri, 23 Jan 2026 14:29:24 +0530 Subject: [PATCH] fix: don't count rate limit waits as failed retry attempts When all accounts are rate-limited or token-exhausted, the retry loop was incorrectly counting the wait time as a failed attempt. This caused premature "Max retries exceeded" errors when we were just patiently waiting for accounts to become available. - Add attempt-- after sleeping for rate limits or strategy waits - Add #diagnoseNoCandidates() to hybrid strategy for better logging - Add getTimeUntilNextToken() and getMinTimeUntilToken() to token tracker - Return waitMs from hybrid strategy when all accounts are token-blocked Co-Authored-By: Claude --- .../strategies/hybrid-strategy.js | 58 ++++++++++++++++++- .../trackers/token-bucket-tracker.js | 34 +++++++++++ src/cloudcode/message-handler.js | 8 ++- src/cloudcode/streaming-handler.js | 8 ++- 4 files changed, 104 insertions(+), 4 deletions(-) diff --git a/src/account-manager/strategies/hybrid-strategy.js b/src/account-manager/strategies/hybrid-strategy.js index acda2a9..49a0b49 100644 --- a/src/account-manager/strategies/hybrid-strategy.js +++ b/src/account-manager/strategies/hybrid-strategy.js @@ -68,8 +68,10 @@ export class HybridStrategy extends BaseStrategy { const candidates = this.#getCandidates(accounts, modelId); if (candidates.length === 0) { - logger.debug('[HybridStrategy] No candidates available'); - return { account: null, index: 0, waitMs: 0 }; + // Diagnose why no candidates are available and compute wait time + const { reason, waitMs } = this.#diagnoseNoCandidates(accounts, modelId); + logger.warn(`[HybridStrategy] No candidates available: ${reason}`); + return { account: null, index: 0, waitMs }; } // Score and sort candidates @@ -232,6 +234,58 @@ export class HybridStrategy extends BaseStrategy { getQuotaTracker() { return this.#quotaTracker; } + + /** + * Diagnose why no candidates are available and compute wait time + * @private + * @param {Array} accounts - Array of account objects + * @param {string} modelId - The model ID + * @returns {{reason: string, waitMs: number}} Diagnosis result + */ + #diagnoseNoCandidates(accounts, modelId) { + let unusableCount = 0; + let unhealthyCount = 0; + let noTokensCount = 0; + let criticalQuotaCount = 0; + const accountsWithoutTokens = []; + + for (const account of accounts) { + if (!this.isAccountUsable(account, modelId)) { + unusableCount++; + continue; + } + if (!this.#healthTracker.isUsable(account.email)) { + unhealthyCount++; + continue; + } + if (!this.#tokenBucketTracker.hasTokens(account.email)) { + noTokensCount++; + accountsWithoutTokens.push(account.email); + continue; + } + if (this.#quotaTracker.isQuotaCritical(account, modelId)) { + criticalQuotaCount++; + continue; + } + } + + // If all accounts are blocked by token bucket, calculate wait time + if (noTokensCount > 0 && unusableCount === 0 && unhealthyCount === 0) { + const waitMs = this.#tokenBucketTracker.getMinTimeUntilToken(accountsWithoutTokens); + const reason = `all ${noTokensCount} account(s) exhausted token bucket, waiting for refill`; + return { reason, waitMs }; + } + + // Build reason string + const parts = []; + if (unusableCount > 0) parts.push(`${unusableCount} unusable/disabled`); + if (unhealthyCount > 0) parts.push(`${unhealthyCount} unhealthy`); + if (noTokensCount > 0) parts.push(`${noTokensCount} no tokens`); + if (criticalQuotaCount > 0) parts.push(`${criticalQuotaCount} critical quota`); + + const reason = parts.length > 0 ? parts.join(', ') : 'unknown'; + return { reason, waitMs: 0 }; + } } export default HybridStrategy; diff --git a/src/account-manager/strategies/trackers/token-bucket-tracker.js b/src/account-manager/strategies/trackers/token-bucket-tracker.js index 33d548c..ea7fcd6 100644 --- a/src/account-manager/strategies/trackers/token-bucket-tracker.js +++ b/src/account-manager/strategies/trackers/token-bucket-tracker.js @@ -116,6 +116,40 @@ export class TokenBucketTracker { clear() { this.#buckets.clear(); } + + /** + * Get time in milliseconds until next token is available for an account + * @param {string} email - Account email + * @returns {number} Milliseconds until next token, 0 if tokens available now + */ + getTimeUntilNextToken(email) { + const currentTokens = this.getTokens(email); + if (currentTokens >= 1) { + return 0; + } + + // Calculate time to regenerate 1 token + const tokensNeeded = 1 - currentTokens; + const minutesNeeded = tokensNeeded / this.#config.tokensPerMinute; + return Math.ceil(minutesNeeded * 60 * 1000); + } + + /** + * Get the minimum time until any account in the list has a token + * @param {Array} emails - List of account emails + * @returns {number} Minimum milliseconds until any account has a token + */ + getMinTimeUntilToken(emails) { + if (emails.length === 0) return 0; + + let minWait = Infinity; + for (const email of emails) { + const wait = this.getTimeUntilNextToken(email); + if (wait === 0) return 0; + minWait = Math.min(minWait, wait); + } + return minWait === Infinity ? 0 : minWait; + } } export default TokenBucketTracker; diff --git a/src/cloudcode/message-handler.js b/src/cloudcode/message-handler.js index 1516085..a283b46 100644 --- a/src/cloudcode/message-handler.js +++ b/src/cloudcode/message-handler.js @@ -160,6 +160,10 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`); await sleep(minWaitMs + 500); // Add 500ms buffer accountManager.clearExpiredLimits(); + + // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt + // This prevents "Max retries exceeded" when we are just patiently waiting + attempt--; continue; // Retry the loop } @@ -174,11 +178,13 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab if (!account && waitMs > 0) { logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`); await sleep(waitMs + 500); + attempt--; // CRITICAL FIX: Don't count strategy wait as failure continue; } if (!account) { - continue; // Shouldn't happen, but safety check + logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`); + continue; } try { diff --git a/src/cloudcode/streaming-handler.js b/src/cloudcode/streaming-handler.js index b82f5fd..0cf0f3e 100644 --- a/src/cloudcode/streaming-handler.js +++ b/src/cloudcode/streaming-handler.js @@ -158,6 +158,10 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`); await sleep(minWaitMs + 500); // Add 500ms buffer accountManager.clearExpiredLimits(); + + // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt + // This prevents "Max retries exceeded" when we are just patiently waiting + attempt--; continue; // Retry the loop } @@ -172,11 +176,13 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb if (!account && waitMs > 0) { logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`); await sleep(waitMs + 500); + attempt--; // CRITICAL FIX: Don't count strategy wait as failure continue; } if (!account) { - continue; // Shouldn't happen, but safety check + logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`); + continue; } try {