From 0fa945b0697b1fda04ccbff7013ee5608ac6cdb0 Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Fri, 23 Jan 2026 14:29:24 +0530
Subject: [PATCH] fix: don't count rate limit waits as failed retry attempts

When all accounts are rate-limited or token-exhausted, the retry loop
was incorrectly counting the wait time as a failed attempt. This caused
premature "Max retries exceeded" errors when we were just patiently
waiting for accounts to become available.

- Add attempt-- after sleeping for rate limits or strategy waits
- Add #diagnoseNoCandidates() to hybrid strategy for better logging
- Add getTimeUntilNextToken() and getMinTimeUntilToken() to token tracker
- Return waitMs from hybrid strategy when all accounts are token-blocked

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../strategies/hybrid-strategy.js             | 58 ++++++++++++++++++-
 .../trackers/token-bucket-tracker.js          | 34 +++++++++++
 src/cloudcode/message-handler.js              |  8 ++-
 src/cloudcode/streaming-handler.js            |  8 ++-
 4 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/src/account-manager/strategies/hybrid-strategy.js b/src/account-manager/strategies/hybrid-strategy.js
index acda2a9..49a0b49 100644
--- a/src/account-manager/strategies/hybrid-strategy.js
+++ b/src/account-manager/strategies/hybrid-strategy.js
@@ -68,8 +68,10 @@ export class HybridStrategy extends BaseStrategy {
         const candidates = this.#getCandidates(accounts, modelId);
 
         if (candidates.length === 0) {
-            logger.debug('[HybridStrategy] No candidates available');
-            return { account: null, index: 0, waitMs: 0 };
+            // Diagnose why no candidates are available and compute wait time
+            const { reason, waitMs } = this.#diagnoseNoCandidates(accounts, modelId);
+            logger.warn(`[HybridStrategy] No candidates available: ${reason}`);
+            return { account: null, index: 0, waitMs };
         }
 
         // Score and sort candidates
@@ -232,6 +234,58 @@ export class HybridStrategy extends BaseStrategy {
     getQuotaTracker() {
         return this.#quotaTracker;
     }
+
+    /**
+     * Diagnose why no candidates are available and compute wait time
+     * @private
+     * @param {Array} accounts - Array of account objects
+     * @param {string} modelId - The model ID
+     * @returns {{reason: string, waitMs: number}} Diagnosis result
+     */
+    #diagnoseNoCandidates(accounts, modelId) {
+        let unusableCount = 0;
+        let unhealthyCount = 0;
+        let noTokensCount = 0;
+        let criticalQuotaCount = 0;
+        const accountsWithoutTokens = [];
+
+        for (const account of accounts) {
+            if (!this.isAccountUsable(account, modelId)) {
+                unusableCount++;
+                continue;
+            }
+            if (!this.#healthTracker.isUsable(account.email)) {
+                unhealthyCount++;
+                continue;
+            }
+            if (!this.#tokenBucketTracker.hasTokens(account.email)) {
+                noTokensCount++;
+                accountsWithoutTokens.push(account.email);
+                continue;
+            }
+            if (this.#quotaTracker.isQuotaCritical(account, modelId)) {
+                criticalQuotaCount++;
+                continue;
+            }
+        }
+
+        // If all accounts are blocked by token bucket, calculate wait time
+        if (noTokensCount > 0 && unusableCount === 0 && unhealthyCount === 0) {
+            const waitMs = this.#tokenBucketTracker.getMinTimeUntilToken(accountsWithoutTokens);
+            const reason = `all ${noTokensCount} account(s) exhausted token bucket, waiting for refill`;
+            return { reason, waitMs };
+        }
+
+        // Build reason string
+        const parts = [];
+        if (unusableCount > 0) parts.push(`${unusableCount} unusable/disabled`);
+        if (unhealthyCount > 0) parts.push(`${unhealthyCount} unhealthy`);
+        if (noTokensCount > 0) parts.push(`${noTokensCount} no tokens`);
+        if (criticalQuotaCount > 0) parts.push(`${criticalQuotaCount} critical quota`);
+
+        const reason = parts.length > 0 ? parts.join(', ') : 'unknown';
+        return { reason, waitMs: 0 };
+    }
 }
 
 export default HybridStrategy;
diff --git a/src/account-manager/strategies/trackers/token-bucket-tracker.js b/src/account-manager/strategies/trackers/token-bucket-tracker.js
index 33d548c..ea7fcd6 100644
--- a/src/account-manager/strategies/trackers/token-bucket-tracker.js
+++ b/src/account-manager/strategies/trackers/token-bucket-tracker.js
@@ -116,6 +116,40 @@ export class TokenBucketTracker {
     clear() {
         this.#buckets.clear();
     }
+
+    /**
+     * Get time in milliseconds until next token is available for an account
+     * @param {string} email - Account email
+     * @returns {number} Milliseconds until next token, 0 if tokens available now
+     */
+    getTimeUntilNextToken(email) {
+        const currentTokens = this.getTokens(email);
+        if (currentTokens >= 1) {
+            return 0;
+        }
+
+        // Calculate time to regenerate 1 token
+        const tokensNeeded = 1 - currentTokens;
+        const minutesNeeded = tokensNeeded / this.#config.tokensPerMinute;
+        return Math.ceil(minutesNeeded * 60 * 1000);
+    }
+
+    /**
+     * Get the minimum time until any account in the list has a token
+     * @param {Array<string>} emails - List of account emails
+     * @returns {number} Minimum milliseconds until any account has a token
+     */
+    getMinTimeUntilToken(emails) {
+        if (emails.length === 0) return 0;
+
+        let minWait = Infinity;
+        for (const email of emails) {
+            const wait = this.getTimeUntilNextToken(email);
+            if (wait === 0) return 0;
+            minWait = Math.min(minWait, wait);
+        }
+        return minWait === Infinity ? 0 : minWait;
+    }
 }
 
 export default TokenBucketTracker;
diff --git a/src/cloudcode/message-handler.js b/src/cloudcode/message-handler.js
index 1516085..a283b46 100644
--- a/src/cloudcode/message-handler.js
+++ b/src/cloudcode/message-handler.js
@@ -160,6 +160,10 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
                 logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
                 await sleep(minWaitMs + 500); // Add 500ms buffer
                 accountManager.clearExpiredLimits();
+
+                // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt
+                // This prevents "Max retries exceeded" when we are just patiently waiting
+                attempt--;
                 continue; // Retry the loop
             }
 
@@ -174,11 +178,13 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
         if (!account && waitMs > 0) {
             logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
             await sleep(waitMs + 500);
+            attempt--; // CRITICAL FIX: Don't count strategy wait as failure
             continue;
         }
 
         if (!account) {
-            continue; // Shouldn't happen, but safety check
+            logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
+            continue;
         }
 
         try {
diff --git a/src/cloudcode/streaming-handler.js b/src/cloudcode/streaming-handler.js
index b82f5fd..0cf0f3e 100644
--- a/src/cloudcode/streaming-handler.js
+++ b/src/cloudcode/streaming-handler.js
@@ -158,6 +158,10 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                 logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
                 await sleep(minWaitMs + 500); // Add 500ms buffer
                 accountManager.clearExpiredLimits();
+
+                // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt
+                // This prevents "Max retries exceeded" when we are just patiently waiting
+                attempt--;
                 continue; // Retry the loop
             }
 
@@ -172,11 +176,13 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
         if (!account && waitMs > 0) {
             logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
             await sleep(waitMs + 500);
+            attempt--; // CRITICAL FIX: Don't count strategy wait as failure
             continue;
         }
 
         if (!account) {
-            continue; // Shouldn't happen, but safety check
+            logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
+            continue;
         }
 
         try {