feat: add configurable account selection strategies

Refactor account selection into a strategy pattern with three options: - Sticky: cache-optimized, stays on same account until rate-limited - Round-robin: load-balanced, rotates every request - Hybrid (default): smart distribution using health scores, token buckets, and LRU The hybrid strategy uses multiple signals for optimal account selection: health tracking for reliability, client-side token buckets for rate limiting, and LRU freshness to prefer rested accounts. Includes WebUI settings for strategy selection and unit tests. Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-18 03:48:43 +05:30
parent 973234372b
commit 5ae19a5b72
31 changed files with 2721 additions and 353 deletions
--- a/src/account-manager/index.js
+++ b/src/account-manager/index.js
@@ -1,6 +1,6 @@
 /**
 * Account Manager
- * Manages multiple Antigravity accounts with sticky selection,
+ * Manages multiple Antigravity accounts with configurable selection strategies,
 * automatic failover, and smart cooldown for rate-limited accounts.
 */

@@ -23,13 +23,9 @@ import {
    clearProjectCache as clearProject,
    clearTokenCache as clearToken
 } from './credentials.js';
-import {
-    pickNext as selectNext,
-    getCurrentStickyAccount as getSticky,
-    shouldWaitForCurrentAccount as shouldWait,
-    pickStickyAccount as selectSticky
-} from './selection.js';
+import { createStrategy, getStrategyLabel, DEFAULT_STRATEGY } from './strategies/index.js';
 import { logger } from '../utils/logger.js';
+import { config } from '../config.js';

 export class AccountManager {
    #accounts = [];
@@ -37,19 +33,26 @@ export class AccountManager {
    #configPath;
    #settings = {};
    #initialized = false;
+    #strategy = null;
+    #strategyName = DEFAULT_STRATEGY;

    // Per-account caches
    #tokenCache = new Map(); // email -> { token, extractedAt }
    #projectCache = new Map(); // email -> projectId

-    constructor(configPath = ACCOUNT_CONFIG_PATH) {
+    constructor(configPath = ACCOUNT_CONFIG_PATH, strategyName = null) {
        this.#configPath = configPath;
+        // Strategy name can be set at construction or later via initialize
+        if (strategyName) {
+            this.#strategyName = strategyName;
+        }
    }

    /**
     * Initialize the account manager by loading config
+     * @param {string} [strategyOverride] - Override strategy name (from CLI flag or env var)
     */
-    async initialize() {
+    async initialize(strategyOverride = null) {
        if (this.#initialized) return;

        const { accounts, settings, activeIndex } = await loadAccounts(this.#configPath);
@@ -66,6 +69,16 @@ export class AccountManager {
            this.#tokenCache = tokenCache;
        }

+        // Determine strategy: CLI override > env var > config file > default
+        const configStrategy = config?.accountSelection?.strategy;
+        const envStrategy = process.env.ACCOUNT_STRATEGY;
+        this.#strategyName = strategyOverride || envStrategy || configStrategy || this.#strategyName;
+
+        // Create the strategy instance
+        const strategyConfig = config?.accountSelection || {};
+        this.#strategy = createStrategy(this.#strategyName, strategyConfig);
+        logger.info(`[AccountManager] Using ${getStrategyLabel(this.#strategyName)} selection strategy`);
+
        // Clear any expired rate limits
        this.clearExpiredLimits();

@@ -138,51 +151,88 @@ export class AccountManager {
    }

    /**
-     * Pick the next available account (fallback when current is unavailable).
-     * Sets activeIndex to the selected account's index.
-     * @param {string} [modelId] - Optional model ID
-     * @returns {Object|null} The next available account or null if none available
-     */
-    pickNext(modelId = null) {
-        const { account, newIndex } = selectNext(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId);
-        this.#currentIndex = newIndex;
-        return account;
-    }
-
-    /**
-     * Get the current account without advancing the index (sticky selection).
-     * Used for cache continuity - sticks to the same account until rate-limited.
-     * @param {string} [modelId] - Optional model ID
-     * @returns {Object|null} The current account or null if unavailable/rate-limited
-     */
-    getCurrentStickyAccount(modelId = null) {
-        const { account, newIndex } = getSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId);
-        this.#currentIndex = newIndex;
-        return account;
-    }
-
-    /**
-     * Check if we should wait for the current account's rate limit to reset.
-     * Used for sticky account selection - wait if rate limit is short (≤ threshold).
-     * @param {string} [modelId] - Optional model ID
-     * @returns {{shouldWait: boolean, waitMs: number, account: Object|null}}
-     */
-    shouldWaitForCurrentAccount(modelId = null) {
-        return shouldWait(this.#accounts, this.#currentIndex, modelId);
-    }
-
-    /**
-     * Pick an account with sticky selection preference.
-     * Prefers the current account for cache continuity, only switches when:
-     * - Current account is rate-limited for > 2 minutes
-     * - Current account is invalid
-     * @param {string} [modelId] - Optional model ID
+     * Select an account using the configured strategy.
+     * This is the main method to use for account selection.
+     * @param {string} [modelId] - Model ID for the request
+     * @param {Object} [options] - Additional options
+     * @param {string} [options.sessionId] - Session ID for cache continuity
     * @returns {{account: Object|null, waitMs: number}} Account to use and optional wait time
     */
-    pickStickyAccount(modelId = null) {
-        const { account, waitMs, newIndex } = selectSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId);
-        this.#currentIndex = newIndex;
-        return { account, waitMs };
+    selectAccount(modelId = null, options = {}) {
+        if (!this.#strategy) {
+            throw new Error('AccountManager not initialized. Call initialize() first.');
+        }
+
+        const result = this.#strategy.selectAccount(this.#accounts, modelId, {
+            currentIndex: this.#currentIndex,
+            onSave: () => this.saveToDisk(),
+            ...options
+        });
+
+        this.#currentIndex = result.index;
+        return { account: result.account, waitMs: result.waitMs || 0 };
+    }
+
+    /**
+     * Notify the strategy of a successful request
+     * @param {Object} account - The account that was used
+     * @param {string} modelId - The model ID that was used
+     */
+    notifySuccess(account, modelId) {
+        if (this.#strategy) {
+            this.#strategy.onSuccess(account, modelId);
+        }
+    }
+
+    /**
+     * Notify the strategy of a rate limit
+     * @param {Object} account - The account that was rate-limited
+     * @param {string} modelId - The model ID that was rate-limited
+     */
+    notifyRateLimit(account, modelId) {
+        if (this.#strategy) {
+            this.#strategy.onRateLimit(account, modelId);
+        }
+    }
+
+    /**
+     * Notify the strategy of a failure
+     * @param {Object} account - The account that failed
+     * @param {string} modelId - The model ID that failed
+     */
+    notifyFailure(account, modelId) {
+        if (this.#strategy) {
+            this.#strategy.onFailure(account, modelId);
+        }
+    }
+
+    /**
+     * Get the current strategy name
+     * @returns {string} Strategy name
+     */
+    getStrategyName() {
+        return this.#strategyName;
+    }
+
+    /**
+     * Get the strategy display label
+     * @returns {string} Strategy display label
+     */
+    getStrategyLabel() {
+        return getStrategyLabel(this.#strategyName);
+    }
+
+    /**
+     * Get the health tracker from the current strategy (if available)
+     * Used by handlers for consecutive failure tracking
+     * Only available when using hybrid strategy
+     * @returns {Object|null} Health tracker instance or null if not available
+     */
+    getHealthTracker() {
+        if (this.#strategy && typeof this.#strategy.getHealthTracker === 'function') {
+            return this.#strategy.getHealthTracker();
+        }
+        return null;
    }

    /**
--- a/src/account-manager/selection.js
+++ b/src/account-manager/selection.js
@@ -1,201 +0,0 @@
-/**
- * Account Selection
- *
- * Handles account picking logic (round-robin, sticky) for cache continuity.
- * All rate limit checks are model-specific.
- */
-
-import { MAX_WAIT_BEFORE_ERROR_MS } from '../constants.js';
-import { formatDuration } from '../utils/helpers.js';
-import { logger } from '../utils/logger.js';
-import { clearExpiredLimits, getAvailableAccounts } from './rate-limits.js';
-
-/**
- * Check if an account is usable for a specific model
- * @param {Object} account - Account object
- * @param {string} modelId - Model ID to check
- * @returns {boolean} True if account is usable
- */
-function isAccountUsable(account, modelId) {
-    if (!account || account.isInvalid) return false;
-
-    // WebUI: Skip disabled accounts
-    if (account.enabled === false) return false;
-
-    if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
-        const limit = account.modelRateLimits[modelId];
-        if (limit.isRateLimited && limit.resetTime > Date.now()) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-/**
- * Pick the next available account (fallback when current is unavailable).
- *
- * @param {Array} accounts - Array of account objects
- * @param {number} currentIndex - Current account index
- * @param {Function} onSave - Callback to save changes
- * @param {string} [modelId] - Model ID to check rate limits for
- * @returns {{account: Object|null, newIndex: number}} The next available account and new index
- */
-export function pickNext(accounts, currentIndex, onSave, modelId = null) {
-    clearExpiredLimits(accounts);
-
-    const available = getAvailableAccounts(accounts, modelId);
-    if (available.length === 0) {
-        return { account: null, newIndex: currentIndex };
-    }
-
-    // Clamp index to valid range
-    let index = currentIndex;
-    if (index >= accounts.length) {
-        index = 0;
-    }
-
-    // Find next available account starting from index AFTER current
-    for (let i = 1; i <= accounts.length; i++) {
-        const idx = (index + i) % accounts.length;
-        const account = accounts[idx];
-
-        if (isAccountUsable(account, modelId)) {
-            account.lastUsed = Date.now();
-
-            const position = idx + 1;
-            const total = accounts.length;
-            logger.info(`[AccountManager] Using account: ${account.email} (${position}/${total})`);
-
-            // Trigger save (don't await to avoid blocking)
-            if (onSave) onSave();
-
-            return { account, newIndex: idx };
-        }
-    }
-
-    return { account: null, newIndex: currentIndex };
-}
-
-/**
- * Get the current account without advancing the index (sticky selection).
- *
- * @param {Array} accounts - Array of account objects
- * @param {number} currentIndex - Current account index
- * @param {Function} onSave - Callback to save changes
- * @param {string} [modelId] - Model ID to check rate limits for
- * @returns {{account: Object|null, newIndex: number}} The current account and index
- */
-export function getCurrentStickyAccount(accounts, currentIndex, onSave, modelId = null) {
-    clearExpiredLimits(accounts);
-
-    if (accounts.length === 0) {
-        return { account: null, newIndex: currentIndex };
-    }
-
-    // Clamp index to valid range
-    let index = currentIndex;
-    if (index >= accounts.length) {
-        index = 0;
-    }
-
-    // Get current account directly (activeIndex = current account)
-    const account = accounts[index];
-
-    if (isAccountUsable(account, modelId)) {
-        account.lastUsed = Date.now();
-        // Trigger save (don't await to avoid blocking)
-        if (onSave) onSave();
-        return { account, newIndex: index };
-    }
-
-    return { account: null, newIndex: index };
-}
-
-/**
- * Check if we should wait for the current account's rate limit to reset.
- *
- * @param {Array} accounts - Array of account objects
- * @param {number} currentIndex - Current account index
- * @param {string} [modelId] - Model ID to check rate limits for
- * @returns {{shouldWait: boolean, waitMs: number, account: Object|null}}
- */
-export function shouldWaitForCurrentAccount(accounts, currentIndex, modelId = null) {
-    if (accounts.length === 0) {
-        return { shouldWait: false, waitMs: 0, account: null };
-    }
-
-    // Clamp index to valid range
-    let index = currentIndex;
-    if (index >= accounts.length) {
-        index = 0;
-    }
-
-    // Get current account directly (activeIndex = current account)
-    const account = accounts[index];
-
-    if (!account || account.isInvalid) {
-        return { shouldWait: false, waitMs: 0, account: null };
-    }
-
-    let waitMs = 0;
-
-    // Check model-specific limit
-    if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
-        const limit = account.modelRateLimits[modelId];
-        if (limit.isRateLimited && limit.resetTime) {
-            waitMs = limit.resetTime - Date.now();
-        }
-    }
-
-    // If wait time is within threshold, recommend waiting
-    if (waitMs > 0 && waitMs <= MAX_WAIT_BEFORE_ERROR_MS) {
-        return { shouldWait: true, waitMs, account };
-    }
-
-    return { shouldWait: false, waitMs: 0, account };
-}
-
-/**
- * Pick an account with sticky selection preference.
- * Prefers the current account for cache continuity.
- *
- * @param {Array} accounts - Array of account objects
- * @param {number} currentIndex - Current account index
- * @param {Function} onSave - Callback to save changes
- * @param {string} [modelId] - Model ID to check rate limits for
- * @returns {{account: Object|null, waitMs: number, newIndex: number}}
- */
-export function pickStickyAccount(accounts, currentIndex, onSave, modelId = null) {
-    // First try to get the current sticky account
-    const { account: stickyAccount, newIndex: stickyIndex } = getCurrentStickyAccount(accounts, currentIndex, onSave, modelId);
-    if (stickyAccount) {
-        return { account: stickyAccount, waitMs: 0, newIndex: stickyIndex };
-    }
-
-    // Current account is rate-limited or invalid.
-    // CHECK IF OTHERS ARE AVAILABLE before deciding to wait.
-    const available = getAvailableAccounts(accounts, modelId);
-    if (available.length > 0) {
-        // Found a free account! Switch immediately.
-        const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave, modelId);
-        if (nextAccount) {
-            logger.info(`[AccountManager] Switched to new account (failover): ${nextAccount.email}`);
-            return { account: nextAccount, waitMs: 0, newIndex };
-        }
-    }
-
-    // No other accounts available. Now checking if we should wait for current account.
-    const waitInfo = shouldWaitForCurrentAccount(accounts, currentIndex, modelId);
-    if (waitInfo.shouldWait) {
-        logger.info(`[AccountManager] Waiting ${formatDuration(waitInfo.waitMs)} for sticky account: ${waitInfo.account.email}`);
-        return { account: null, waitMs: waitInfo.waitMs, newIndex: currentIndex };
-    }
-
-    // Current account unavailable for too long/invalid, and no others available?
-    const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave, modelId);
-    if (nextAccount) {
-        logger.info(`[AccountManager] Switched to new account for cache: ${nextAccount.email}`);
-    }
-    return { account: nextAccount, waitMs: 0, newIndex };
-}
--- a/src/account-manager/strategies/base-strategy.js
+++ b/src/account-manager/strategies/base-strategy.js
@@ -0,0 +1,104 @@
+/**
+ * Base Strategy
+ *
+ * Abstract base class defining the interface for account selection strategies.
+ * All strategies must implement the selectAccount method.
+ */
+
+/**
+ * @typedef {Object} SelectionResult
+ * @property {Object|null} account - The selected account or null if none available
+ * @property {number} index - The index of the selected account
+ * @property {number} [waitMs] - Optional wait time before account becomes available
+ */
+
+export class BaseStrategy {
+    /**
+     * Create a new BaseStrategy
+     * @param {Object} config - Strategy configuration
+     */
+    constructor(config = {}) {
+        if (new.target === BaseStrategy) {
+            throw new Error('BaseStrategy is abstract and cannot be instantiated directly');
+        }
+        this.config = config;
+    }
+
+    /**
+     * Select an account for a request
+     * @param {Array} accounts - Array of account objects
+     * @param {string} modelId - The model ID for the request
+     * @param {Object} options - Additional options
+     * @param {number} options.currentIndex - Current account index
+     * @param {string} [options.sessionId] - Session ID for cache continuity
+     * @param {Function} [options.onSave] - Callback to save changes
+     * @returns {SelectionResult} The selected account and index
+     */
+    selectAccount(accounts, modelId, options = {}) {
+        throw new Error('selectAccount must be implemented by subclass');
+    }
+
+    /**
+     * Called after a successful request
+     * @param {Object} account - The account that was used
+     * @param {string} modelId - The model ID that was used
+     */
+    onSuccess(account, modelId) {
+        // Default: no-op, override in subclass if needed
+    }
+
+    /**
+     * Called when a request is rate-limited
+     * @param {Object} account - The account that was rate-limited
+     * @param {string} modelId - The model ID that was rate-limited
+     */
+    onRateLimit(account, modelId) {
+        // Default: no-op, override in subclass if needed
+    }
+
+    /**
+     * Called when a request fails (non-rate-limit error)
+     * @param {Object} account - The account that failed
+     * @param {string} modelId - The model ID that failed
+     */
+    onFailure(account, modelId) {
+        // Default: no-op, override in subclass if needed
+    }
+
+    /**
+     * Check if an account is usable for a specific model
+     * @param {Object} account - Account object
+     * @param {string} modelId - Model ID to check
+     * @returns {boolean} True if account is usable
+     */
+    isAccountUsable(account, modelId) {
+        if (!account || account.isInvalid) return false;
+
+        // Skip disabled accounts
+        if (account.enabled === false) return false;
+
+        // Check model-specific rate limit
+        if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
+            const limit = account.modelRateLimits[modelId];
+            if (limit.isRateLimited && limit.resetTime > Date.now()) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Get all usable accounts for a model
+     * @param {Array} accounts - Array of account objects
+     * @param {string} modelId - Model ID to check
+     * @returns {Array} Array of usable accounts with their original indices
+     */
+    getUsableAccounts(accounts, modelId) {
+        return accounts
+            .map((account, index) => ({ account, index }))
+            .filter(({ account }) => this.isAccountUsable(account, modelId));
+    }
+}
+
+export default BaseStrategy;
--- a/src/account-manager/strategies/hybrid-strategy.js
+++ b/src/account-manager/strategies/hybrid-strategy.js
@@ -0,0 +1,195 @@
+/**
+ * Hybrid Strategy
+ *
+ * Smart selection based on health score, token bucket, and LRU freshness.
+ * Combines multiple signals for optimal account distribution.
+ *
+ * Scoring formula:
+ *   score = (Health × 2) + ((Tokens / MaxTokens × 100) × 5) + (LRU × 0.1)
+ *
+ * Filters accounts that are:
+ * - Not rate-limited
+ * - Not invalid or disabled
+ * - Health score >= minUsable
+ * - Has tokens available
+ */
+
+import { BaseStrategy } from './base-strategy.js';
+import { HealthTracker, TokenBucketTracker } from './trackers/index.js';
+import { logger } from '../../utils/logger.js';
+
+// Default weights for scoring
+const DEFAULT_WEIGHTS = {
+    health: 2,
+    tokens: 5,
+    lru: 0.1
+};
+
+export class HybridStrategy extends BaseStrategy {
+    #healthTracker;
+    #tokenBucketTracker;
+    #weights;
+
+    /**
+     * Create a new HybridStrategy
+     * @param {Object} config - Strategy configuration
+     * @param {Object} [config.healthScore] - Health tracker configuration
+     * @param {Object} [config.tokenBucket] - Token bucket configuration
+     * @param {Object} [config.weights] - Scoring weights
+     */
+    constructor(config = {}) {
+        super(config);
+        this.#healthTracker = new HealthTracker(config.healthScore || {});
+        this.#tokenBucketTracker = new TokenBucketTracker(config.tokenBucket || {});
+        this.#weights = { ...DEFAULT_WEIGHTS, ...config.weights };
+    }
+
+    /**
+     * Select an account based on combined health, tokens, and LRU score
+     *
+     * @param {Array} accounts - Array of account objects
+     * @param {string} modelId - The model ID for the request
+     * @param {Object} options - Additional options
+     * @returns {SelectionResult} The selected account and index
+     */
+    selectAccount(accounts, modelId, options = {}) {
+        const { onSave } = options;
+
+        if (accounts.length === 0) {
+            return { account: null, index: 0, waitMs: 0 };
+        }
+
+        // Get candidates that pass all filters
+        const candidates = this.#getCandidates(accounts, modelId);
+
+        if (candidates.length === 0) {
+            logger.debug('[HybridStrategy] No candidates available');
+            return { account: null, index: 0, waitMs: 0 };
+        }
+
+        // Score and sort candidates
+        const scored = candidates.map(({ account, index }) => ({
+            account,
+            index,
+            score: this.#calculateScore(account)
+        }));
+
+        scored.sort((a, b) => b.score - a.score);
+
+        // Select the best candidate
+        const best = scored[0];
+        best.account.lastUsed = Date.now();
+
+        // Consume a token from the bucket
+        this.#tokenBucketTracker.consume(best.account.email);
+
+        if (onSave) onSave();
+
+        const position = best.index + 1;
+        const total = accounts.length;
+        logger.info(`[HybridStrategy] Using account: ${best.account.email} (${position}/${total}, score: ${best.score.toFixed(1)})`);
+
+        return { account: best.account, index: best.index, waitMs: 0 };
+    }
+
+    /**
+     * Called after a successful request
+     */
+    onSuccess(account, modelId) {
+        if (account && account.email) {
+            this.#healthTracker.recordSuccess(account.email);
+        }
+    }
+
+    /**
+     * Called when a request is rate-limited
+     */
+    onRateLimit(account, modelId) {
+        if (account && account.email) {
+            this.#healthTracker.recordRateLimit(account.email);
+        }
+    }
+
+    /**
+     * Called when a request fails
+     */
+    onFailure(account, modelId) {
+        if (account && account.email) {
+            this.#healthTracker.recordFailure(account.email);
+            // Refund the token since the request didn't complete
+            this.#tokenBucketTracker.refund(account.email);
+        }
+    }
+
+    /**
+     * Get candidates that pass all filters
+     * @private
+     */
+    #getCandidates(accounts, modelId) {
+        return accounts
+            .map((account, index) => ({ account, index }))
+            .filter(({ account }) => {
+                // Basic usability check
+                if (!this.isAccountUsable(account, modelId)) {
+                    return false;
+                }
+
+                // Health score check
+                if (!this.#healthTracker.isUsable(account.email)) {
+                    return false;
+                }
+
+                // Token availability check
+                if (!this.#tokenBucketTracker.hasTokens(account.email)) {
+                    return false;
+                }
+
+                return true;
+            });
+    }
+
+    /**
+     * Calculate the combined score for an account
+     * @private
+     */
+    #calculateScore(account) {
+        const email = account.email;
+
+        // Health component (0-100 scaled by weight)
+        const health = this.#healthTracker.getScore(email);
+        const healthComponent = health * this.#weights.health;
+
+        // Token component (0-100 scaled by weight)
+        const tokens = this.#tokenBucketTracker.getTokens(email);
+        const maxTokens = this.#tokenBucketTracker.getMaxTokens();
+        const tokenRatio = tokens / maxTokens;
+        const tokenComponent = (tokenRatio * 100) * this.#weights.tokens;
+
+        // LRU component (older = higher score)
+        // Use time since last use, capped at 1 hour for scoring
+        const lastUsed = account.lastUsed || 0;
+        const timeSinceLastUse = Math.min(Date.now() - lastUsed, 3600000); // Cap at 1 hour
+        const lruMinutes = timeSinceLastUse / 60000;
+        const lruComponent = lruMinutes * this.#weights.lru;
+
+        return healthComponent + tokenComponent + lruComponent;
+    }
+
+    /**
+     * Get the health tracker (for testing/debugging)
+     * @returns {HealthTracker} The health tracker instance
+     */
+    getHealthTracker() {
+        return this.#healthTracker;
+    }
+
+    /**
+     * Get the token bucket tracker (for testing/debugging)
+     * @returns {TokenBucketTracker} The token bucket tracker instance
+     */
+    getTokenBucketTracker() {
+        return this.#tokenBucketTracker;
+    }
+}
+
+export default HybridStrategy;
--- a/src/account-manager/strategies/index.js
+++ b/src/account-manager/strategies/index.js
@@ -0,0 +1,85 @@
+/**
+ * Strategy Factory
+ *
+ * Creates and exports account selection strategy instances.
+ */
+
+import { StickyStrategy } from './sticky-strategy.js';
+import { RoundRobinStrategy } from './round-robin-strategy.js';
+import { HybridStrategy } from './hybrid-strategy.js';
+import { logger } from '../../utils/logger.js';
+import {
+    SELECTION_STRATEGIES,
+    DEFAULT_SELECTION_STRATEGY
+} from '../../constants.js';
+
+// Re-export strategy constants for convenience
+export const STRATEGY_NAMES = SELECTION_STRATEGIES;
+export const DEFAULT_STRATEGY = DEFAULT_SELECTION_STRATEGY;
+
+// Strategy display labels
+export const STRATEGY_LABELS = {
+    'sticky': 'Sticky (Cache Optimized)',
+    'round-robin': 'Round Robin (Load Balanced)',
+    'hybrid': 'Hybrid (Smart Distribution)'
+};
+
+/**
+ * Create a strategy instance
+ * @param {string} strategyName - Name of the strategy ('sticky', 'round-robin', 'hybrid')
+ * @param {Object} config - Strategy configuration
+ * @returns {BaseStrategy} The strategy instance
+ */
+export function createStrategy(strategyName, config = {}) {
+    const name = (strategyName || DEFAULT_STRATEGY).toLowerCase();
+
+    switch (name) {
+        case 'sticky':
+            logger.debug('[Strategy] Creating StickyStrategy');
+            return new StickyStrategy(config);
+
+        case 'round-robin':
+        case 'roundrobin':
+            logger.debug('[Strategy] Creating RoundRobinStrategy');
+            return new RoundRobinStrategy(config);
+
+        case 'hybrid':
+            logger.debug('[Strategy] Creating HybridStrategy');
+            return new HybridStrategy(config);
+
+        default:
+            logger.warn(`[Strategy] Unknown strategy "${strategyName}", falling back to ${DEFAULT_STRATEGY}`);
+            return new HybridStrategy(config);
+    }
+}
+
+/**
+ * Check if a strategy name is valid
+ * @param {string} name - Strategy name to check
+ * @returns {boolean} True if valid
+ */
+export function isValidStrategy(name) {
+    if (!name) return false;
+    const lower = name.toLowerCase();
+    return STRATEGY_NAMES.includes(lower) || lower === 'roundrobin';
+}
+
+/**
+ * Get the display label for a strategy
+ * @param {string} name - Strategy name
+ * @returns {string} Display label
+ */
+export function getStrategyLabel(name) {
+    const lower = (name || DEFAULT_STRATEGY).toLowerCase();
+    if (lower === 'roundrobin') return STRATEGY_LABELS['round-robin'];
+    return STRATEGY_LABELS[lower] || STRATEGY_LABELS[DEFAULT_STRATEGY];
+}
+
+// Re-export strategies for direct use
+export { StickyStrategy } from './sticky-strategy.js';
+export { RoundRobinStrategy } from './round-robin-strategy.js';
+export { HybridStrategy } from './hybrid-strategy.js';
+export { BaseStrategy } from './base-strategy.js';
+
+// Re-export trackers
+export { HealthTracker, TokenBucketTracker } from './trackers/index.js';
--- a/src/account-manager/strategies/round-robin-strategy.js
+++ b/src/account-manager/strategies/round-robin-strategy.js
@@ -0,0 +1,76 @@
+/**
+ * Round-Robin Strategy
+ *
+ * Rotates to the next account on every request for maximum throughput.
+ * Does not maintain cache continuity but maximizes concurrent requests.
+ */
+
+import { BaseStrategy } from './base-strategy.js';
+import { logger } from '../../utils/logger.js';
+
+export class RoundRobinStrategy extends BaseStrategy {
+    #cursor = 0; // Tracks current position in rotation
+
+    /**
+     * Create a new RoundRobinStrategy
+     * @param {Object} config - Strategy configuration
+     */
+    constructor(config = {}) {
+        super(config);
+    }
+
+    /**
+     * Select the next available account in rotation
+     *
+     * @param {Array} accounts - Array of account objects
+     * @param {string} modelId - The model ID for the request
+     * @param {Object} options - Additional options
+     * @returns {SelectionResult} The selected account and index
+     */
+    selectAccount(accounts, modelId, options = {}) {
+        const { onSave } = options;
+
+        if (accounts.length === 0) {
+            return { account: null, index: 0, waitMs: 0 };
+        }
+
+        // Clamp cursor to valid range
+        if (this.#cursor >= accounts.length) {
+            this.#cursor = 0;
+        }
+
+        // Start from the next position after the cursor
+        const startIndex = (this.#cursor + 1) % accounts.length;
+
+        // Try each account starting from startIndex
+        for (let i = 0; i < accounts.length; i++) {
+            const idx = (startIndex + i) % accounts.length;
+            const account = accounts[idx];
+
+            if (this.isAccountUsable(account, modelId)) {
+                account.lastUsed = Date.now();
+                this.#cursor = idx;
+
+                if (onSave) onSave();
+
+                const position = idx + 1;
+                const total = accounts.length;
+                logger.info(`[RoundRobinStrategy] Using account: ${account.email} (${position}/${total})`);
+
+                return { account, index: idx, waitMs: 0 };
+            }
+        }
+
+        // No usable accounts found
+        return { account: null, index: this.#cursor, waitMs: 0 };
+    }
+
+    /**
+     * Reset the cursor position
+     */
+    resetCursor() {
+        this.#cursor = 0;
+    }
+}
+
+export default RoundRobinStrategy;
--- a/src/account-manager/strategies/sticky-strategy.js
+++ b/src/account-manager/strategies/sticky-strategy.js
@@ -0,0 +1,138 @@
+/**
+ * Sticky Strategy
+ *
+ * Keeps using the same account until it becomes unavailable (rate-limited or invalid).
+ * Best for prompt caching as it maintains cache continuity across requests.
+ */
+
+import { BaseStrategy } from './base-strategy.js';
+import { logger } from '../../utils/logger.js';
+import { formatDuration } from '../../utils/helpers.js';
+import { MAX_WAIT_BEFORE_ERROR_MS } from '../../constants.js';
+
+export class StickyStrategy extends BaseStrategy {
+    /**
+     * Create a new StickyStrategy
+     * @param {Object} config - Strategy configuration
+     */
+    constructor(config = {}) {
+        super(config);
+    }
+
+    /**
+     * Select an account with sticky preference
+     * Prefers the current account for cache continuity, only switches when:
+     * - Current account is rate-limited for > 2 minutes
+     * - Current account is invalid
+     * - Current account is disabled
+     *
+     * @param {Array} accounts - Array of account objects
+     * @param {string} modelId - The model ID for the request
+     * @param {Object} options - Additional options
+     * @returns {SelectionResult} The selected account and index
+     */
+    selectAccount(accounts, modelId, options = {}) {
+        const { currentIndex = 0, onSave } = options;
+
+        if (accounts.length === 0) {
+            return { account: null, index: currentIndex, waitMs: 0 };
+        }
+
+        // Clamp index to valid range
+        let index = currentIndex >= accounts.length ? 0 : currentIndex;
+        const currentAccount = accounts[index];
+
+        // Check if current account is usable
+        if (this.isAccountUsable(currentAccount, modelId)) {
+            currentAccount.lastUsed = Date.now();
+            if (onSave) onSave();
+            return { account: currentAccount, index, waitMs: 0 };
+        }
+
+        // Current account is not usable - check if others are available
+        const usableAccounts = this.getUsableAccounts(accounts, modelId);
+
+        if (usableAccounts.length > 0) {
+            // Found a free account - switch immediately
+            const { account: nextAccount, index: nextIndex } = this.#pickNext(
+                accounts,
+                index,
+                modelId,
+                onSave
+            );
+            if (nextAccount) {
+                logger.info(`[StickyStrategy] Switched to new account (failover): ${nextAccount.email}`);
+                return { account: nextAccount, index: nextIndex, waitMs: 0 };
+            }
+        }
+
+        // No other accounts available - check if we should wait for current
+        const waitInfo = this.#shouldWaitForAccount(currentAccount, modelId);
+        if (waitInfo.shouldWait) {
+            logger.info(`[StickyStrategy] Waiting ${formatDuration(waitInfo.waitMs)} for sticky account: ${currentAccount.email}`);
+            return { account: null, index, waitMs: waitInfo.waitMs };
+        }
+
+        // Current account unavailable for too long, try to find any other
+        const { account: nextAccount, index: nextIndex } = this.#pickNext(
+            accounts,
+            index,
+            modelId,
+            onSave
+        );
+
+        return { account: nextAccount, index: nextIndex, waitMs: 0 };
+    }
+
+    /**
+     * Pick the next available account starting from after the current index
+     * @private
+     */
+    #pickNext(accounts, currentIndex, modelId, onSave) {
+        for (let i = 1; i <= accounts.length; i++) {
+            const idx = (currentIndex + i) % accounts.length;
+            const account = accounts[idx];
+
+            if (this.isAccountUsable(account, modelId)) {
+                account.lastUsed = Date.now();
+                if (onSave) onSave();
+
+                const position = idx + 1;
+                const total = accounts.length;
+                logger.info(`[StickyStrategy] Using account: ${account.email} (${position}/${total})`);
+
+                return { account, index: idx };
+            }
+        }
+
+        return { account: null, index: currentIndex };
+    }
+
+    /**
+     * Check if we should wait for an account's rate limit to reset
+     * @private
+     */
+    #shouldWaitForAccount(account, modelId) {
+        if (!account || account.isInvalid || account.enabled === false) {
+            return { shouldWait: false, waitMs: 0 };
+        }
+
+        let waitMs = 0;
+
+        if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
+            const limit = account.modelRateLimits[modelId];
+            if (limit.isRateLimited && limit.resetTime) {
+                waitMs = limit.resetTime - Date.now();
+            }
+        }
+
+        // Wait if within threshold
+        if (waitMs > 0 && waitMs <= MAX_WAIT_BEFORE_ERROR_MS) {
+            return { shouldWait: true, waitMs };
+        }
+
+        return { shouldWait: false, waitMs: 0 };
+    }
+}
+
+export default StickyStrategy;
--- a/src/account-manager/strategies/trackers/health-tracker.js
+++ b/src/account-manager/strategies/trackers/health-tracker.js
@@ -0,0 +1,162 @@
+/**
+ * Health Tracker
+ *
+ * Tracks per-account health scores to prioritize healthy accounts.
+ * Scores increase on success and decrease on failures/rate limits.
+ * Passive recovery over time helps accounts recover from temporary issues.
+ */
+
+// Default configuration (matches opencode-antigravity-auth)
+const DEFAULT_CONFIG = {
+    initial: 70,           // Starting score for new accounts
+    successReward: 1,      // Points on successful request
+    rateLimitPenalty: -10, // Points on rate limit
+    failurePenalty: -20,   // Points on other failures
+    recoveryPerHour: 2,    // Passive recovery rate
+    minUsable: 50,         // Minimum score to be selected
+    maxScore: 100          // Maximum score cap
+};
+
+export class HealthTracker {
+    #scores = new Map(); // email -> { score, lastUpdated, consecutiveFailures }
+    #config;
+
+    /**
+     * Create a new HealthTracker
+     * @param {Object} config - Health score configuration
+     */
+    constructor(config = {}) {
+        this.#config = { ...DEFAULT_CONFIG, ...config };
+    }
+
+    /**
+     * Get the health score for an account
+     * @param {string} email - Account email
+     * @returns {number} Current health score (with passive recovery applied)
+     */
+    getScore(email) {
+        const record = this.#scores.get(email);
+        if (!record) {
+            return this.#config.initial;
+        }
+
+        // Apply passive recovery based on time elapsed
+        const now = Date.now();
+        const hoursElapsed = (now - record.lastUpdated) / (1000 * 60 * 60);
+        const recovery = hoursElapsed * this.#config.recoveryPerHour;
+        const recoveredScore = Math.min(
+            this.#config.maxScore,
+            record.score + recovery
+        );
+
+        return recoveredScore;
+    }
+
+    /**
+     * Record a successful request for an account
+     * @param {string} email - Account email
+     */
+    recordSuccess(email) {
+        const currentScore = this.getScore(email);
+        const newScore = Math.min(
+            this.#config.maxScore,
+            currentScore + this.#config.successReward
+        );
+        this.#scores.set(email, {
+            score: newScore,
+            lastUpdated: Date.now(),
+            consecutiveFailures: 0 // Reset on success
+        });
+    }
+
+    /**
+     * Record a rate limit for an account
+     * @param {string} email - Account email
+     */
+    recordRateLimit(email) {
+        const record = this.#scores.get(email);
+        const currentScore = this.getScore(email);
+        const newScore = Math.max(
+            0,
+            currentScore + this.#config.rateLimitPenalty
+        );
+        this.#scores.set(email, {
+            score: newScore,
+            lastUpdated: Date.now(),
+            consecutiveFailures: (record?.consecutiveFailures ?? 0) + 1
+        });
+    }
+
+    /**
+     * Record a failure for an account
+     * @param {string} email - Account email
+     */
+    recordFailure(email) {
+        const record = this.#scores.get(email);
+        const currentScore = this.getScore(email);
+        const newScore = Math.max(
+            0,
+            currentScore + this.#config.failurePenalty
+        );
+        this.#scores.set(email, {
+            score: newScore,
+            lastUpdated: Date.now(),
+            consecutiveFailures: (record?.consecutiveFailures ?? 0) + 1
+        });
+    }
+
+    /**
+     * Check if an account is usable based on health score
+     * @param {string} email - Account email
+     * @returns {boolean} True if account health score is above minimum threshold
+     */
+    isUsable(email) {
+        return this.getScore(email) >= this.#config.minUsable;
+    }
+
+    /**
+     * Get the minimum usable score threshold
+     * @returns {number} Minimum score for an account to be usable
+     */
+    getMinUsable() {
+        return this.#config.minUsable;
+    }
+
+    /**
+     * Get the maximum score cap
+     * @returns {number} Maximum health score
+     */
+    getMaxScore() {
+        return this.#config.maxScore;
+    }
+
+    /**
+     * Reset the score for an account (e.g., after re-authentication)
+     * @param {string} email - Account email
+     */
+    reset(email) {
+        this.#scores.set(email, {
+            score: this.#config.initial,
+            lastUpdated: Date.now(),
+            consecutiveFailures: 0
+        });
+    }
+
+    /**
+     * Get the consecutive failure count for an account
+     * @param {string} email - Account email
+     * @returns {number} Number of consecutive failures
+     */
+    getConsecutiveFailures(email) {
+        return this.#scores.get(email)?.consecutiveFailures ?? 0;
+    }
+
+    /**
+     * Clear all tracked scores
+     */
+    clear() {
+        this.#scores.clear();
+    }
+}
+
+export default HealthTracker;
--- a/src/account-manager/strategies/trackers/index.js
+++ b/src/account-manager/strategies/trackers/index.js
@@ -0,0 +1,8 @@
+/**
+ * Trackers Index
+ *
+ * Exports all tracker classes for account selection strategies.
+ */
+
+export { HealthTracker } from './health-tracker.js';
+export { TokenBucketTracker } from './token-bucket-tracker.js';
--- a/src/account-manager/strategies/trackers/token-bucket-tracker.js
+++ b/src/account-manager/strategies/trackers/token-bucket-tracker.js
@@ -0,0 +1,121 @@
+/**
+ * Token Bucket Tracker
+ *
+ * Client-side rate limiting using the token bucket algorithm.
+ * Each account has a bucket of tokens that regenerate over time.
+ * Requests consume tokens; accounts without tokens are deprioritized.
+ */
+
+// Default configuration (matches opencode-antigravity-auth)
+const DEFAULT_CONFIG = {
+    maxTokens: 50,        // Maximum token capacity
+    tokensPerMinute: 6,   // Regeneration rate
+    initialTokens: 50     // Starting tokens
+};
+
+export class TokenBucketTracker {
+    #buckets = new Map(); // email -> { tokens, lastUpdated }
+    #config;
+
+    /**
+     * Create a new TokenBucketTracker
+     * @param {Object} config - Token bucket configuration
+     */
+    constructor(config = {}) {
+        this.#config = { ...DEFAULT_CONFIG, ...config };
+    }
+
+    /**
+     * Get the current token count for an account
+     * @param {string} email - Account email
+     * @returns {number} Current token count (with regeneration applied)
+     */
+    getTokens(email) {
+        const bucket = this.#buckets.get(email);
+        if (!bucket) {
+            return this.#config.initialTokens;
+        }
+
+        // Apply token regeneration based on time elapsed
+        const now = Date.now();
+        const minutesElapsed = (now - bucket.lastUpdated) / (1000 * 60);
+        const regenerated = minutesElapsed * this.#config.tokensPerMinute;
+        const currentTokens = Math.min(
+            this.#config.maxTokens,
+            bucket.tokens + regenerated
+        );
+
+        return currentTokens;
+    }
+
+    /**
+     * Check if an account has tokens available
+     * @param {string} email - Account email
+     * @returns {boolean} True if account has at least 1 token
+     */
+    hasTokens(email) {
+        return this.getTokens(email) >= 1;
+    }
+
+    /**
+     * Consume a token from an account's bucket
+     * @param {string} email - Account email
+     * @returns {boolean} True if token was consumed, false if no tokens available
+     */
+    consume(email) {
+        const currentTokens = this.getTokens(email);
+        if (currentTokens < 1) {
+            return false;
+        }
+
+        this.#buckets.set(email, {
+            tokens: currentTokens - 1,
+            lastUpdated: Date.now()
+        });
+        return true;
+    }
+
+    /**
+     * Refund a token to an account's bucket (e.g., on request failure before processing)
+     * @param {string} email - Account email
+     */
+    refund(email) {
+        const currentTokens = this.getTokens(email);
+        const newTokens = Math.min(
+            this.#config.maxTokens,
+            currentTokens + 1
+        );
+        this.#buckets.set(email, {
+            tokens: newTokens,
+            lastUpdated: Date.now()
+        });
+    }
+
+    /**
+     * Get the maximum token capacity
+     * @returns {number} Maximum tokens per bucket
+     */
+    getMaxTokens() {
+        return this.#config.maxTokens;
+    }
+
+    /**
+     * Reset the bucket for an account
+     * @param {string} email - Account email
+     */
+    reset(email) {
+        this.#buckets.set(email, {
+            tokens: this.#config.initialTokens,
+            lastUpdated: Date.now()
+        });
+    }
+
+    /**
+     * Clear all tracked buckets
+     */
+    clear() {
+        this.#buckets.clear();
+    }
+}
+
+export default TokenBucketTracker;
--- a/src/cloudcode/message-handler.js
+++ b/src/cloudcode/message-handler.js
@@ -10,6 +10,11 @@ import {
    MAX_RETRIES,
    MAX_WAIT_BEFORE_ERROR_MS,
    DEFAULT_COOLDOWN_MS,
+    RATE_LIMIT_DEDUP_WINDOW_MS,
+    MAX_CONSECUTIVE_FAILURES,
+    EXTENDED_COOLDOWN_MS,
+    CAPACITY_RETRY_DELAY_MS,
+    MAX_CAPACITY_RETRIES,
    isThinkingModel
 } from '../constants.js';
 import { convertGoogleToAnthropic } from '../format/index.js';
@@ -21,6 +26,85 @@ import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
 import { parseThinkingSSEResponse } from './sse-parser.js';
 import { getFallbackModel } from '../fallback-config.js';

+/**
+ * Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
+ * Tracks last rate limit timestamp per model to skip duplicate retries
+ */
+const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
+
+/**
+ * Check if we should skip retry due to recent rate limit on this model
+ * @param {string} model - Model ID
+ * @returns {boolean} True if retry should be skipped (within dedup window)
+ */
+function shouldSkipRetryDueToDedup(model) {
+    const lastTimestamp = lastRateLimitTimestamps.get(model);
+    if (!lastTimestamp) return false;
+
+    const elapsed = Date.now() - lastTimestamp;
+    if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
+        logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Record rate limit timestamp for deduplication
+ * @param {string} model - Model ID
+ */
+function recordRateLimitTimestamp(model) {
+    lastRateLimitTimestamps.set(model, Date.now());
+}
+
+/**
+ * Clear rate limit timestamp after successful retry
+ * @param {string} model - Model ID
+ */
+function clearRateLimitTimestamp(model) {
+    lastRateLimitTimestamps.delete(model);
+}
+
+/**
+ * Gap 3: Detect permanent authentication failures that require re-authentication
+ * These should mark the account as invalid rather than just clearing cache
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if permanent auth failure
+ */
+function isPermanentAuthFailure(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('invalid_grant') ||
+        lower.includes('token revoked') ||
+        lower.includes('token has been expired or revoked') ||
+        lower.includes('token_revoked') ||
+        lower.includes('invalid_client') ||
+        lower.includes('credentials are invalid');
+}
+
+/**
+ * Gap 4: Detect if 429 error is due to model capacity (not user quota)
+ * Capacity issues should retry on same account with shorter delay
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if capacity exhausted (not quota)
+ */
+function isModelCapacityExhausted(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('model_capacity_exhausted') ||
+        lower.includes('capacity_exhausted') ||
+        lower.includes('model is currently overloaded') ||
+        lower.includes('service temporarily unavailable');
+}
+
+// Periodically clean up stale dedup timestamps (every 60 seconds)
+setInterval(() => {
+    const cutoff = Date.now() - 60000; // 1 minute
+    for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
+        if (timestamp < cutoff) {
+            lastRateLimitTimestamps.delete(model);
+        }
+    }
+}, 60000);
+
 /**
 * Send a non-streaming request to Cloud Code with multi-account support
 * Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
@@ -83,10 +167,14 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
            throw new Error('No accounts available');
        }

-        // Pick sticky account (prefers current for cache continuity)
-        let account = accountManager.getCurrentStickyAccount(model);
-        if (!account) {
-            account = accountManager.pickNext(model);
+        // Select account using configured strategy
+        const { account, waitMs } = accountManager.selectAccount(model);
+
+        // If strategy returns a wait time, sleep and retry
+        if (!account && waitMs > 0) {
+            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
+            await sleep(waitMs + 500);
+            continue;
        }

        if (!account) {
@@ -101,11 +189,14 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab

            logger.debug(`[CloudCode] Sending request for model: ${model}`);

-            // Try each endpoint
+            // Try each endpoint with index-based loop for capacity retry support
            let lastError = null;
            let retriedOnce = false; // Track if we've already retried for short rate limit
+            let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
+            let endpointIndex = 0;

-            for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
+            while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
+                const endpoint = ANTIGRAVITY_ENDPOINT_FALLBACKS[endpointIndex];
                try {
                    const url = isThinking
                        ? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
@@ -122,16 +213,45 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
                        logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);

                        if (response.status === 401) {
-                            // Auth error - clear caches and retry with fresh token
-                            logger.warn('[CloudCode] Auth error, refreshing token...');
+                            // Gap 3: Check for permanent auth failures
+                            if (isPermanentAuthFailure(errorText)) {
+                                logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
+                                accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
+                                throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
+                            }
+
+                            // Transient auth error - clear caches and retry with fresh token
+                            logger.warn('[CloudCode] Transient auth error, refreshing token...');
                            accountManager.clearTokenCache(account.email);
                            accountManager.clearProjectCache(account.email);
+                            endpointIndex++;
                            continue;
                        }

                        if (response.status === 429) {
                            const resetMs = parseResetTime(response, errorText);

+                            // Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
+                            if (isModelCapacityExhausted(errorText)) {
+                                if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
+                                    capacityRetryCount++;
+                                    const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
+                                    logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
+                                    await sleep(waitMs);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
+                                }
+                                // Max capacity retries exceeded - treat as quota exhaustion
+                                logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
+                            }
+
+                            // Gap 1: Check deduplication window to prevent thundering herd
+                            if (shouldSkipRetryDueToDedup(model)) {
+                                logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
+                                accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
+                                throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
+                            }
+
                            // Decision: wait and retry OR switch account
                            if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
                                // Long-term quota exhaustion (> 10s) - switch to next account
@@ -144,31 +264,11 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab

                                if (!retriedOnce) {
                                    retriedOnce = true;
+                                    recordRateLimitTimestamp(model); // Gap 1: Record before retry
                                    logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
                                    await sleep(waitMs);
-                                    // Retry same endpoint
-                                    const retryResponse = await fetch(url, {
-                                        method: 'POST',
-                                        headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
-                                        body: JSON.stringify(payload)
-                                    });
-
-                                    if (retryResponse.ok) {
-                                        // Process retry response
-                                        if (isThinking) {
-                                            return await parseThinkingSSEResponse(retryResponse, anthropicRequest.model);
-                                        }
-                                        const data = await retryResponse.json();
-                                        logger.debug('[CloudCode] Response received after retry');
-                                        return convertGoogleToAnthropic(data, anthropicRequest.model);
-                                    }
-
-                                    // Retry also failed - parse new reset time
-                                    const retryErrorText = await retryResponse.text();
-                                    const retryResetMs = parseResetTime(retryResponse, retryErrorText);
-                                    logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
-                                    accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
-                                    throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
                                } else {
                                    // Already retried once, mark and switch
                                    accountManager.markRateLimited(account.email, waitMs, model);
@@ -184,18 +284,26 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
                                logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
                                await sleep(1000);
                            }
+                            endpointIndex++;
                            continue;
                        }
                    }

                    // For thinking models, parse SSE and accumulate all parts
                    if (isThinking) {
-                        return await parseThinkingSSEResponse(response, anthropicRequest.model);
+                        const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
+                        // Gap 1: Clear timestamp on success
+                        clearRateLimitTimestamp(model);
+                        accountManager.notifySuccess(account, model);
+                        return result;
                    }

                    // Non-thinking models use regular JSON
                    const data = await response.json();
                    logger.debug('[CloudCode] Response received');
+                    // Gap 1: Clear timestamp on success
+                    clearRateLimitTimestamp(model);
+                    accountManager.notifySuccess(account, model);
                    return convertGoogleToAnthropic(data, anthropicRequest.model);

                } catch (endpointError) {
@@ -204,6 +312,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
                    }
                    logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
                    lastError = endpointError;
+                    endpointIndex++;
                }
            }

@@ -219,7 +328,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab

        } catch (error) {
            if (isRateLimitError(error)) {
-                // Rate limited - already marked, continue to next account
+                // Rate limited - already marked, notify strategy and continue to next account
+                accountManager.notifyRateLimit(account, model);
                logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
                continue;
            }
@@ -230,15 +340,31 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
            }
            // Handle 5xx errors
            if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
-                logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
-                accountManager.pickNext(model);
+                accountManager.notifyFailure(account, model);
+
+                // Gap 2: Check consecutive failures for extended cooldown
+                const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
+                if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
+                }
                continue;
            }

            if (isNetworkError(error)) {
-                logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
+                accountManager.notifyFailure(account, model);
+
+                // Gap 2: Check consecutive failures for extended cooldown
+                const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
+                if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
+                }
                await sleep(1000);
-                accountManager.pickNext(model);
                continue;
            }

--- a/src/cloudcode/streaming-handler.js
+++ b/src/cloudcode/streaming-handler.js
@@ -10,7 +10,12 @@ import {
    MAX_RETRIES,
    MAX_EMPTY_RESPONSE_RETRIES,
    MAX_WAIT_BEFORE_ERROR_MS,
-    DEFAULT_COOLDOWN_MS
+    DEFAULT_COOLDOWN_MS,
+    RATE_LIMIT_DEDUP_WINDOW_MS,
+    MAX_CONSECUTIVE_FAILURES,
+    EXTENDED_COOLDOWN_MS,
+    CAPACITY_RETRY_DELAY_MS,
+    MAX_CAPACITY_RETRIES
 } from '../constants.js';
 import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
 import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
@@ -21,6 +26,83 @@ import { streamSSEResponse } from './sse-streamer.js';
 import { getFallbackModel } from '../fallback-config.js';
 import crypto from 'crypto';

+/**
+ * Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
+ * Tracks last rate limit timestamp per model to skip duplicate retries
+ */
+const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
+
+/**
+ * Check if we should skip retry due to recent rate limit on this model
+ * @param {string} model - Model ID
+ * @returns {boolean} True if retry should be skipped (within dedup window)
+ */
+function shouldSkipRetryDueToDedup(model) {
+    const lastTimestamp = lastRateLimitTimestamps.get(model);
+    if (!lastTimestamp) return false;
+
+    const elapsed = Date.now() - lastTimestamp;
+    if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
+        logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Record rate limit timestamp for deduplication
+ * @param {string} model - Model ID
+ */
+function recordRateLimitTimestamp(model) {
+    lastRateLimitTimestamps.set(model, Date.now());
+}
+
+/**
+ * Clear rate limit timestamp after successful retry
+ * @param {string} model - Model ID
+ */
+function clearRateLimitTimestamp(model) {
+    lastRateLimitTimestamps.delete(model);
+}
+
+/**
+ * Gap 3: Detect permanent authentication failures that require re-authentication
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if permanent auth failure
+ */
+function isPermanentAuthFailure(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('invalid_grant') ||
+        lower.includes('token revoked') ||
+        lower.includes('token has been expired or revoked') ||
+        lower.includes('token_revoked') ||
+        lower.includes('invalid_client') ||
+        lower.includes('credentials are invalid');
+}
+
+/**
+ * Gap 4: Detect if 429 error is due to model capacity (not user quota)
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if capacity exhausted (not quota)
+ */
+function isModelCapacityExhausted(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('model_capacity_exhausted') ||
+        lower.includes('capacity_exhausted') ||
+        lower.includes('model is currently overloaded') ||
+        lower.includes('service temporarily unavailable');
+}
+
+// Periodically clean up stale dedup timestamps (every 60 seconds)
+setInterval(() => {
+    const cutoff = Date.now() - 60000; // 1 minute
+    for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
+        if (timestamp < cutoff) {
+            lastRateLimitTimestamps.delete(model);
+        }
+    }
+}, 60000);
+
 /**
 * Send a streaming request to Cloud Code with multi-account support
 * Streams events in real-time as they arrive from the server
@@ -83,10 +165,14 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
            throw new Error('No accounts available');
        }

-        // Pick sticky account (prefers current for cache continuity)
-        let account = accountManager.getCurrentStickyAccount(model);
-        if (!account) {
-            account = accountManager.pickNext(model);
+        // Select account using configured strategy
+        const { account, waitMs } = accountManager.selectAccount(model);
+
+        // If strategy returns a wait time, sleep and retry
+        if (!account && waitMs > 0) {
+            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
+            await sleep(waitMs + 500);
+            continue;
        }

        if (!account) {
@@ -101,11 +187,14 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb

            logger.debug(`[CloudCode] Starting stream for model: ${model}`);

-            // Try each endpoint for streaming
+            // Try each endpoint with index-based loop for capacity retry support
            let lastError = null;
            let retriedOnce = false; // Track if we've already retried for short rate limit
+            let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
+            let endpointIndex = 0;

-            for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
+            while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
+                const endpoint = ANTIGRAVITY_ENDPOINT_FALLBACKS[endpointIndex];
                try {
                    const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;

@@ -120,15 +209,44 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                        logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);

                        if (response.status === 401) {
-                            // Auth error - clear caches and retry
+                            // Gap 3: Check for permanent auth failures
+                            if (isPermanentAuthFailure(errorText)) {
+                                logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
+                                accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
+                                throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
+                            }
+
+                            // Transient auth error - clear caches and retry
                            accountManager.clearTokenCache(account.email);
                            accountManager.clearProjectCache(account.email);
+                            endpointIndex++;
                            continue;
                        }

                        if (response.status === 429) {
                            const resetMs = parseResetTime(response, errorText);

+                            // Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
+                            if (isModelCapacityExhausted(errorText)) {
+                                if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
+                                    capacityRetryCount++;
+                                    const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
+                                    logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
+                                    await sleep(waitMs);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
+                                }
+                                // Max capacity retries exceeded - treat as quota exhaustion
+                                logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
+                            }
+
+                            // Gap 1: Check deduplication window to prevent thundering herd
+                            if (shouldSkipRetryDueToDedup(model)) {
+                                logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
+                                accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
+                                throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
+                            }
+
                            // Decision: wait and retry OR switch account
                            if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
                                // Long-term quota exhaustion (> 10s) - switch to next account
@@ -141,28 +259,11 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb

                                if (!retriedOnce) {
                                    retriedOnce = true;
+                                    recordRateLimitTimestamp(model); // Gap 1: Record before retry
                                    logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
                                    await sleep(waitMs);
-                                    // Retry same endpoint
-                                    const retryResponse = await fetch(url, {
-                                        method: 'POST',
-                                        headers: buildHeaders(token, model, 'text/event-stream'),
-                                        body: JSON.stringify(payload)
-                                    });
-
-                                    if (retryResponse.ok) {
-                                        // Stream the retry response
-                                        yield* streamSSEResponse(retryResponse, anthropicRequest.model);
-                                        logger.debug('[CloudCode] Stream completed after retry');
-                                        return;
-                                    }
-
-                                    // Retry also failed - parse new reset time
-                                    const retryErrorText = await retryResponse.text();
-                                    const retryResetMs = parseResetTime(retryResponse, retryErrorText);
-                                    logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
-                                    accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
-                                    throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
                                } else {
                                    // Already retried once, mark and switch
                                    accountManager.markRateLimited(account.email, waitMs, model);
@@ -179,6 +280,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                            await sleep(1000);
                        }

+                        endpointIndex++;
                        continue;
                    }

@@ -189,6 +291,9 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                        try {
                            yield* streamSSEResponse(currentResponse, anthropicRequest.model);
                            logger.debug('[CloudCode] Stream completed');
+                            // Gap 1: Clear timestamp on success
+                            clearRateLimitTimestamp(model);
+                            accountManager.notifySuccess(account, model);
                            return;
                        } catch (streamError) {
                            // Only retry on EmptyResponseError
@@ -226,8 +331,13 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                                    throw new Error(`429 RESOURCE_EXHAUSTED during retry: ${retryErrorText}`);
                                }

-                                // Auth error - clear caches and throw with recognizable message
+                                // Auth error - check for permanent failure
                                if (currentResponse.status === 401) {
+                                    if (isPermanentAuthFailure(retryErrorText)) {
+                                        logger.error(`[CloudCode] Permanent auth failure during retry for ${account.email}`);
+                                        accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
+                                        throw new Error(`AUTH_INVALID_PERMANENT: ${retryErrorText}`);
+                                    }
                                    accountManager.clearTokenCache(account.email);
                                    accountManager.clearProjectCache(account.email);
                                    throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
@@ -261,6 +371,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                    }
                    logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
                    lastError = endpointError;
+                    endpointIndex++;
                }
            }

@@ -276,7 +387,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb

        } catch (error) {
            if (isRateLimitError(error)) {
-                // Rate limited - already marked, continue to next account
+                // Rate limited - already marked, notify strategy and continue to next account
+                accountManager.notifyRateLimit(account, model);
                logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
                continue;
            }
@@ -287,15 +399,31 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
            }
            // Handle 5xx errors
            if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
-                logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
-                accountManager.pickNext(model);
+                accountManager.notifyFailure(account, model);
+
+                // Gap 2: Check consecutive failures for extended cooldown
+                const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
+                if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
+                }
                continue;
            }

            if (isNetworkError(error)) {
-                logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
+                accountManager.notifyFailure(account, model);
+
+                // Gap 2: Check consecutive failures for extended cooldown
+                const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
+                if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
+                }
                await sleep(1000);
-                accountManager.pickNext(model);
                continue;
            }

--- a/src/config.js
+++ b/src/config.js
@@ -15,7 +15,26 @@ const DEFAULT_CONFIG = {
    persistTokenCache: false,
    defaultCooldownMs: 10000,  // 10 seconds
    maxWaitBeforeErrorMs: 120000, // 2 minutes
-    modelMapping: {}
+    modelMapping: {},
+    // Account selection strategy configuration
+    accountSelection: {
+        strategy: 'hybrid',           // 'sticky' | 'round-robin' | 'hybrid'
+        // Hybrid strategy tuning (optional - sensible defaults)
+        healthScore: {
+            initial: 70,              // Starting score for new accounts
+            successReward: 1,         // Points on successful request
+            rateLimitPenalty: -10,    // Points on rate limit
+            failurePenalty: -20,      // Points on other failures
+            recoveryPerHour: 2,       // Passive recovery rate
+            minUsable: 50,            // Minimum score to be selected
+            maxScore: 100             // Maximum score cap
+        },
+        tokenBucket: {
+            maxTokens: 50,            // Maximum token capacity
+            tokensPerMinute: 6,       // Regeneration rate
+            initialTokens: 50         // Starting tokens
+        }
+    }
 };

 // Config locations
--- a/src/constants.js
+++ b/src/constants.js
@@ -103,9 +103,24 @@ export const MAX_ACCOUNTS = config?.maxAccounts || 10; // From config or 10
 // Rate limit wait thresholds
 export const MAX_WAIT_BEFORE_ERROR_MS = config?.maxWaitBeforeErrorMs || 120000; // From config or 2 minutes

+// Gap 1: Retry deduplication - prevents thundering herd on concurrent rate limits
+export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 5000; // 5 seconds
+
+// Gap 2: Consecutive failure tracking - extended cooldown after repeated failures
+export const MAX_CONSECUTIVE_FAILURES = config?.maxConsecutiveFailures || 3;
+export const EXTENDED_COOLDOWN_MS = config?.extendedCooldownMs || 60000; // 1 minute
+
+// Gap 4: Capacity exhaustion - shorter retry for model capacity issues (not quota)
+export const CAPACITY_RETRY_DELAY_MS = config?.capacityRetryDelayMs || 2000; // 2 seconds
+export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 3;
+
 // Thinking model constants
 export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length

+// Account selection strategies
+export const SELECTION_STRATEGIES = ['sticky', 'round-robin', 'hybrid'];
+export const DEFAULT_SELECTION_STRATEGY = 'hybrid';
+
 // Gemini-specific limits
 export const GEMINI_MAX_OUTPUT_TOKENS = 16384;

@@ -235,6 +250,11 @@ export default {
    MAX_EMPTY_RESPONSE_RETRIES,
    MAX_ACCOUNTS,
    MAX_WAIT_BEFORE_ERROR_MS,
+    RATE_LIMIT_DEDUP_WINDOW_MS,
+    MAX_CONSECUTIVE_FAILURES,
+    EXTENDED_COOLDOWN_MS,
+    CAPACITY_RETRY_DELAY_MS,
+    MAX_CAPACITY_RETRIES,
    MIN_SIGNATURE_LENGTH,
    GEMINI_MAX_OUTPUT_TOKENS,
    GEMINI_SKIP_SIGNATURE,
--- a/src/errors.js
+++ b/src/errors.js
@@ -149,6 +149,23 @@ export class EmptyResponseError extends AntigravityError {
    }
 }

+/**
+ * Capacity exhausted error - Google's model is at capacity (not user quota)
+ * Should retry on same account with shorter delay, not switch accounts immediately
+ * Different from QUOTA_EXHAUSTED which indicates user's daily/hourly limit
+ */
+export class CapacityExhaustedError extends AntigravityError {
+    /**
+     * @param {string} message - Error message
+     * @param {number|null} retryAfterMs - Suggested retry delay in ms
+     */
+    constructor(message = 'Model capacity exhausted', retryAfterMs = null) {
+        super(message, 'CAPACITY_EXHAUSTED', true, { retryAfterMs });
+        this.name = 'CapacityExhaustedError';
+        this.retryAfterMs = retryAfterMs;
+    }
+}
+
 /**
 * Check if an error is a rate limit error
 * Works with both custom error classes and legacy string-based errors
@@ -188,6 +205,22 @@ export function isEmptyResponseError(error) {
        error?.name === 'EmptyResponseError';
 }

+/**
+ * Check if an error is a capacity exhausted error (model overload, not user quota)
+ * This is different from quota exhaustion - capacity issues are temporary infrastructure
+ * limits that should be retried on the SAME account with shorter delays
+ * @param {Error} error - Error to check
+ * @returns {boolean}
+ */
+export function isCapacityExhaustedError(error) {
+    if (error instanceof CapacityExhaustedError) return true;
+    const msg = (error.message || '').toLowerCase();
+    return msg.includes('model_capacity_exhausted') ||
+        msg.includes('capacity_exhausted') ||
+        msg.includes('model is currently overloaded') ||
+        msg.includes('service temporarily unavailable');
+}
+
 export default {
    AntigravityError,
    RateLimitError,
@@ -197,7 +230,9 @@ export default {
    ApiError,
    NativeModuleError,
    EmptyResponseError,
+    CapacityExhaustedError,
    isRateLimitError,
    isAuthError,
-    isEmptyResponseError
+    isEmptyResponseError,
+    isCapacityExhaustedError
 };
--- a/src/index.js
+++ b/src/index.js
@@ -3,9 +3,10 @@
 * Entry point - starts the proxy server
 */

-import app from './server.js';
+import app, { accountManager } from './server.js';
 import { DEFAULT_PORT } from './constants.js';
 import { logger } from './utils/logger.js';
+import { getStrategyLabel, STRATEGY_NAMES, DEFAULT_STRATEGY } from './account-manager/strategies/index.js';
 import path from 'path';
 import os from 'os';

@@ -14,6 +15,21 @@ const args = process.argv.slice(2);
 const isDebug = args.includes('--debug') || process.env.DEBUG === 'true';
 const isFallbackEnabled = args.includes('--fallback') || process.env.FALLBACK === 'true';

+// Parse --strategy flag (format: --strategy=sticky or --strategy sticky)
+let strategyOverride = null;
+for (let i = 0; i < args.length; i++) {
+    if (args[i].startsWith('--strategy=')) {
+        strategyOverride = args[i].split('=')[1];
+    } else if (args[i] === '--strategy' && args[i + 1]) {
+        strategyOverride = args[i + 1];
+    }
+}
+// Validate strategy
+if (strategyOverride && !STRATEGY_NAMES.includes(strategyOverride.toLowerCase())) {
+    logger.warn(`[Startup] Invalid strategy "${strategyOverride}". Valid options: ${STRATEGY_NAMES.join(', ')}. Using default.`);
+    strategyOverride = null;
+}
+
 // Initialize logger
 logger.setDebug(isDebug);

@@ -45,6 +61,7 @@ const server = app.listen(PORT, () => {
    
    // Build Control section dynamically
    let controlSection = '║  Control:                                                    ║\n';
+    controlSection += '║    --strategy=<s>     Set selection strategy (sticky/hybrid) ║\n';
    if (!isDebug) {
        controlSection += '║    --debug            Enable debug logging                   ║\n';
    }
@@ -53,17 +70,18 @@ const server = app.listen(PORT, () => {
    }
    controlSection += '║    Ctrl+C             Stop server                            ║';

-    // Build status section if any modes are active
-    let statusSection = '';
-    if (isDebug || isFallbackEnabled) {
-        statusSection = '║                                                              ║\n';
-        statusSection += '║  Active Modes:                                               ║\n';
-        if (isDebug) {
-            statusSection += '║    ✓ Debug mode enabled                                      ║\n';
-        }
-        if (isFallbackEnabled) {
-            statusSection += '║    ✓ Model fallback enabled                                  ║\n';
-        }
+    // Get the strategy label (accountManager will be initialized by now)
+    const strategyLabel = accountManager.getStrategyLabel();
+
+    // Build status section - always show strategy, plus any active modes
+    let statusSection = '║                                                              ║\n';
+    statusSection += '║  Active Modes:                                               ║\n';
+    statusSection += `${border}    ${align4(`✓ Strategy: ${strategyLabel}`)}${border}\n`;
+    if (isDebug) {
+        statusSection += '║    ✓ Debug mode enabled                                      ║\n';
+    }
+    if (isFallbackEnabled) {
+        statusSection += '║    ✓ Model fallback enabled                                  ║\n';
    }

    logger.log(`
--- a/src/server.js
+++ b/src/server.js
@@ -26,13 +26,23 @@ import usageStats from './modules/usage-stats.js';
 const args = process.argv.slice(2);
 const FALLBACK_ENABLED = args.includes('--fallback') || process.env.FALLBACK === 'true';

+// Parse --strategy flag (format: --strategy=sticky or --strategy sticky)
+let STRATEGY_OVERRIDE = null;
+for (let i = 0; i < args.length; i++) {
+    if (args[i].startsWith('--strategy=')) {
+        STRATEGY_OVERRIDE = args[i].split('=')[1];
+    } else if (args[i] === '--strategy' && args[i + 1]) {
+        STRATEGY_OVERRIDE = args[i + 1];
+    }
+}
+
 const app = express();

 // Disable x-powered-by header for security
 app.disable('x-powered-by');

 // Initialize account manager (will be fully initialized on first request or startup)
-const accountManager = new AccountManager();
+export const accountManager = new AccountManager();

 // Track initialization status
 let isInitialized = false;
@@ -50,7 +60,7 @@ async function ensureInitialized() {

    initPromise = (async () => {
        try {
-            await accountManager.initialize();
+            await accountManager.initialize(STRATEGY_OVERRIDE);
            isInitialized = true;
            const status = accountManager.getStatus();
            logger.success(`[Server] Account pool initialized: ${status.summary}`);
--- a/src/webui/index.js
+++ b/src/webui/index.js
@@ -282,7 +282,7 @@ export function mountWebUI(app, dirname, accountManager) {
     */
    app.post('/api/config', (req, res) => {
        try {
-            const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs } = req.body;
+            const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs, accountSelection } = req.body;

            // Only allow updating specific fields (security)
            const updates = {};
@@ -308,6 +308,16 @@ export function mountWebUI(app, dirname, accountManager) {
            if (typeof maxWaitBeforeErrorMs === 'number' && maxWaitBeforeErrorMs >= 0 && maxWaitBeforeErrorMs <= 600000) {
                updates.maxWaitBeforeErrorMs = maxWaitBeforeErrorMs;
            }
+            // Account selection strategy validation
+            if (accountSelection && typeof accountSelection === 'object') {
+                const validStrategies = ['sticky', 'round-robin', 'hybrid'];
+                if (accountSelection.strategy && validStrategies.includes(accountSelection.strategy)) {
+                    updates.accountSelection = {
+                        ...(config.accountSelection || {}),
+                        strategy: accountSelection.strategy
+                    };
+                }
+            }

            if (Object.keys(updates).length === 0) {
                return res.status(400).json({