feat: comprehensive rate limit handling overhaul (inspired by opencode-antigravity-auth)

This commit addresses "Max retries exceeded" errors during stress testing where
all accounts would become exhausted simultaneously due to short per-second rate
limits triggering cascading failures.

## Rate Limit Parser (`rate-limit-parser.js`)
- Remove 2s buffer enforcement that caused cascading failures when API returned
  short reset times (200-600ms). Now adds 200ms buffer for sub-500ms resets
- Add `parseRateLimitReason()` for smart backoff based on error type:
  QUOTA_EXHAUSTED, RATE_LIMIT_EXCEEDED, MODEL_CAPACITY_EXHAUSTED, SERVER_ERROR

## Message/Streaming Handlers
- Add per-account+model rate limit state tracking with exponential backoff
- For short rate limits (< 1 second), wait and retry on same account instead
  of switching - prevents thundering herd when all accounts hit per-second limits
- Add throttle wait support for fallback modes (emergency/lastResort)
- Add `calculateSmartBackoff()` with progressive tiers by error type

## HybridStrategy (`hybrid-strategy.js`)
- Refactor `#getCandidates()` to return 4 fallback levels:
  - `normal`: All filters pass (health, tokens, quota)
  - `quota`: Bypass critical quota check
  - `emergency`: Bypass health check when ALL accounts unhealthy
  - `lastResort`: Bypass BOTH health AND token bucket checks
- Add throttle wait times: 500ms for lastResort, 250ms for emergency
- Fix LRU calculation to use seconds (matches opencode-antigravity-auth)

## Health Tracker
- Increase `recoveryPerHour` from 2 to 10 for faster recovery (1 hour vs 5 hours)

## Account Manager
- Add consecutive failure tracking: `getConsecutiveFailures()`,
  `incrementConsecutiveFailures()`, `resetConsecutiveFailures()`
- Add cooldown mechanism separate from rate limits with `CooldownReason`
- Reset consecutive failures on successful request

## Base Strategy
- Add `isAccountCoolingDown()` check in `isAccountUsable()`

## Constants
- Replace fixed `CAPACITY_RETRY_DELAY_MS` with progressive `CAPACITY_BACKOFF_TIERS_MS`
- Add `BACKOFF_BY_ERROR_TYPE` for smart backoff
- Add `QUOTA_EXHAUSTED_BACKOFF_TIERS_MS` for progressive quota backoff
- Add `MIN_BACKOFF_MS` floor to prevent "Available in 0s" loops
- Increase `MAX_CAPACITY_RETRIES` from 3 to 5
- Reduce `RATE_LIMIT_DEDUP_WINDOW_MS` from 5s to 2s

## Frontend
- Remove `capacityRetryDelayMs` config (replaced by progressive tiers)
- Update default `maxCapacityRetries` display from 3 to 5

## Testing
- Add `tests/stress-test.cjs` for concurrent request stress testing

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2026-01-24 22:43:53 +05:30
parent 71b9b001fd
commit 5a85f0cfcc
20 changed files with 869 additions and 244 deletions

View File

@@ -274,12 +274,6 @@ window.Components.serverConfig = () => ({
(v) => window.Validators.validateTimeout(v, EXTENDED_COOLDOWN_MIN, EXTENDED_COOLDOWN_MAX));
},
toggleCapacityRetryDelayMs(value) {
const { CAPACITY_RETRY_DELAY_MIN, CAPACITY_RETRY_DELAY_MAX } = window.AppConstants.VALIDATION;
this.saveConfigField('capacityRetryDelayMs', value, 'Capacity Retry Delay',
(v) => window.Validators.validateTimeout(v, CAPACITY_RETRY_DELAY_MIN, CAPACITY_RETRY_DELAY_MAX));
},
toggleMaxCapacityRetries(value) {
const { MAX_CAPACITY_RETRIES_MIN, MAX_CAPACITY_RETRIES_MAX } = window.AppConstants.VALIDATION;
this.saveConfigField('maxCapacityRetries', value, 'Max Capacity Retries',

View File

@@ -85,10 +85,6 @@ window.AppConstants.VALIDATION = {
EXTENDED_COOLDOWN_MIN: 10000,
EXTENDED_COOLDOWN_MAX: 300000,
// Capacity retry delay (500ms - 10 seconds)
CAPACITY_RETRY_DELAY_MIN: 500,
CAPACITY_RETRY_DELAY_MAX: 10000,
// Capacity retries (1 - 10)
MAX_CAPACITY_RETRIES_MIN: 1,
MAX_CAPACITY_RETRIES_MAX: 10

View File

@@ -245,8 +245,6 @@ window.translations.en = {
maxConsecutiveFailuresDesc: "Number of consecutive failures before applying extended cooldown to an account.",
extendedCooldown: "Extended Cooldown",
extendedCooldownDesc: "Cooldown duration applied after max consecutive failures reached.",
capacityRetryDelay: "Capacity Retry Delay",
capacityRetryDelayDesc: "Delay before retrying when model capacity is exhausted (not quota).",
maxCapacityRetries: "Max Capacity Retries",
maxCapacityRetriesDesc: "Maximum retries for capacity exhaustion before switching accounts.",
saveConfigServer: "Save Configuration",

View File

@@ -278,8 +278,6 @@ window.translations.id = {
maxConsecutiveFailuresDesc: "Jumlah kegagalan berturut-turut sebelum menerapkan cooldown diperpanjang.",
extendedCooldown: "Cooldown Diperpanjang",
extendedCooldownDesc: "Durasi cooldown setelah mencapai maks. kegagalan berturut-turut.",
capacityRetryDelay: "Jeda Retry Kapasitas",
capacityRetryDelayDesc: "Jeda sebelum retry saat kapasitas model habis (bukan kuota).",
maxCapacityRetries: "Maks. Retry Kapasitas",
maxCapacityRetriesDesc: "Maksimum retry untuk kehabisan kapasitas sebelum ganti akun.",
saveConfigServer: "Simpan Konfigurasi",

View File

@@ -223,8 +223,6 @@ window.translations.pt = {
maxConsecutiveFailuresDesc: "Número de falhas consecutivas antes de aplicar resfriamento estendido.",
extendedCooldown: "Resfriamento Estendido",
extendedCooldownDesc: "Duração do resfriamento aplicado após atingir máx. de falhas consecutivas.",
capacityRetryDelay: "Atraso de Retry de Capacidade",
capacityRetryDelayDesc: "Atraso antes de tentar novamente quando capacidade do modelo está esgotada (não quota).",
maxCapacityRetries: "Máx. Retries de Capacidade",
maxCapacityRetriesDesc: "Máximo de retries para esgotamento de capacidade antes de trocar conta.",
saveConfigServer: "Salvar Configuração",

View File

@@ -227,8 +227,6 @@ window.translations.tr = {
maxConsecutiveFailuresDesc: "Uzatılmış soğuma uygulamadan önce ardışık başarısızlık sayısı.",
extendedCooldown: "Uzatılmış Soğuma",
extendedCooldownDesc: "Maks. ardışık başarısızlık sonrası uygulanan soğuma süresi.",
capacityRetryDelay: "Kapasite Yeniden Deneme Gecikmesi",
capacityRetryDelayDesc: "Model kapasitesi tükendiğinde (kota değil) yeniden denemeden önceki gecikme.",
maxCapacityRetries: "Maks. Kapasite Yeniden Denemesi",
maxCapacityRetriesDesc: "Hesap değiştirmeden önce kapasite tükenmesi için maksimum yeniden deneme.",
saveConfigServer: "Yapılandırmayı Kaydet",

View File

@@ -245,8 +245,6 @@ window.translations.zh = {
maxConsecutiveFailuresDesc: "触发扩展冷却前允许的连续失败次数。",
extendedCooldown: "扩展冷却时间",
extendedCooldownDesc: "达到最大连续失败后应用的冷却时长。",
capacityRetryDelay: "容量重试延迟",
capacityRetryDelayDesc: "模型容量耗尽(非配额)时重试前的延迟。",
maxCapacityRetries: "最大容量重试次数",
maxCapacityRetriesDesc: "容量耗尽时在切换账号前的最大重试次数。",
saveConfigServer: "保存配置",

View File

@@ -1226,47 +1226,23 @@
x-text="$store.global.t('extendedCooldownDesc')">Applied after max consecutive failures.</p>
</div>
<div class="form-control">
<label class="label pt-0">
<span class="label-text text-gray-400 text-xs"
x-text="$store.global.t('capacityRetryDelay')">Capacity Retry Delay</span>
<span class="label-text-alt font-mono text-neon-cyan text-xs font-semibold"
x-text="Math.round((serverConfig.capacityRetryDelayMs || 2000) / 1000) + 's'"></span>
</label>
<div class="flex gap-3 items-center">
<input type="range" min="500" max="10000" step="500"
class="custom-range custom-range-cyan flex-1"
:value="serverConfig.capacityRetryDelayMs || 2000"
:style="`background-size: ${((serverConfig.capacityRetryDelayMs || 2000) - 500) / 95}% 100%`"
@input="toggleCapacityRetryDelayMs($event.target.value)"
aria-label="Capacity retry delay slider">
<input type="number" min="500" max="10000" step="500"
class="input input-xs input-bordered w-20 bg-space-800 border-space-border text-white font-mono text-center"
:value="serverConfig.capacityRetryDelayMs || 2000"
@change="toggleCapacityRetryDelayMs($event.target.value)"
aria-label="Capacity retry delay value">
</div>
<p class="text-[9px] text-gray-600 mt-1 leading-tight"
x-text="$store.global.t('capacityRetryDelayDesc')">Delay for capacity (not quota) issues.</p>
</div>
<div class="form-control">
<label class="label pt-0">
<span class="label-text text-gray-400 text-xs"
x-text="$store.global.t('maxCapacityRetries')">Max Capacity Retries</span>
<span class="label-text-alt font-mono text-neon-cyan text-xs font-semibold"
x-text="serverConfig.maxCapacityRetries || 3"></span>
x-text="serverConfig.maxCapacityRetries || 5"></span>
</label>
<div class="flex gap-3 items-center">
<input type="range" min="1" max="10" step="1"
class="custom-range custom-range-cyan flex-1"
:value="serverConfig.maxCapacityRetries || 3"
:style="`background-size: ${((serverConfig.maxCapacityRetries || 3) - 1) / 0.09}% 100%`"
:value="serverConfig.maxCapacityRetries || 5"
:style="`background-size: ${((serverConfig.maxCapacityRetries || 5) - 1) / 0.09}% 100%`"
@input="toggleMaxCapacityRetries($event.target.value)"
aria-label="Max capacity retries slider">
<input type="number" min="1" max="10" step="1"
class="input input-xs input-bordered w-16 bg-space-800 border-space-border text-white font-mono text-center"
:value="serverConfig.maxCapacityRetries || 3"
:value="serverConfig.maxCapacityRetries || 5"
@change="toggleMaxCapacityRetries($event.target.value)"
aria-label="Max capacity retries value">
</div>

View File

@@ -15,7 +15,15 @@ import {
markRateLimited as markLimited,
markInvalid as markAccountInvalid,
getMinWaitTimeMs as getMinWait,
getRateLimitInfo as getLimitInfo
getRateLimitInfo as getLimitInfo,
getConsecutiveFailures as getFailures,
resetConsecutiveFailures as resetFailures,
incrementConsecutiveFailures as incrementFailures,
markAccountCoolingDown as markCoolingDown,
isAccountCoolingDown as checkCoolingDown,
clearAccountCooldown as clearCooldown,
getCooldownRemaining as getCooldownMs,
CooldownReason
} from './rate-limits.js';
import {
getTokenForAccount as fetchToken,
@@ -182,6 +190,10 @@ export class AccountManager {
if (this.#strategy) {
this.#strategy.onSuccess(account, modelId);
}
// Reset consecutive failures on success (matches opencode-antigravity-auth)
if (account?.email) {
resetFailures(this.#accounts, account.email);
}
}
/**
@@ -206,6 +218,26 @@ export class AccountManager {
}
}
/**
* Get the consecutive failure count for an account
* Used for progressive backoff calculation
* @param {string} email - Account email
* @returns {number} Number of consecutive failures
*/
getConsecutiveFailures(email) {
return getFailures(this.#accounts, email);
}
/**
* Increment the consecutive failure count without marking as rate limited
* Used for quick retries to track failures while staying on same account
* @param {string} email - Account email
* @returns {number} New consecutive failure count
*/
incrementConsecutiveFailures(email) {
return incrementFailures(this.#accounts, email);
}
/**
* Get the current strategy name
* @returns {string} Strategy name
@@ -275,6 +307,52 @@ export class AccountManager {
return getLimitInfo(this.#accounts, email, modelId);
}
// ============================================================================
// Cooldown Methods (matches opencode-antigravity-auth)
// ============================================================================
/**
* Mark an account as cooling down for a specified duration
* Used for temporary backoff separate from rate limits
* @param {string} email - Email of the account
* @param {number} cooldownMs - Duration of cooldown in milliseconds
* @param {string} [reason] - Reason for the cooldown (use CooldownReason constants)
*/
markAccountCoolingDown(email, cooldownMs, reason = CooldownReason.RATE_LIMIT) {
markCoolingDown(this.#accounts, email, cooldownMs, reason);
}
/**
* Check if an account is currently cooling down
* @param {string} email - Email of the account
* @returns {boolean} True if account is cooling down
*/
isAccountCoolingDown(email) {
const account = this.#accounts.find(a => a.email === email);
return account ? checkCoolingDown(account) : false;
}
/**
* Clear the cooldown for an account
* @param {string} email - Email of the account
*/
clearAccountCooldown(email) {
const account = this.#accounts.find(a => a.email === email);
if (account) {
clearCooldown(account);
}
}
/**
* Get time remaining until cooldown expires for an account
* @param {string} email - Email of the account
* @returns {number} Milliseconds until cooldown expires, 0 if not cooling down
*/
getCooldownRemaining(email) {
const account = this.#accounts.find(a => a.email === email);
return account ? getCooldownMs(account) : 0;
}
/**
* Get OAuth token for an account
* @param {Object} account - Account object with email and credentials
@@ -378,4 +456,7 @@ export class AccountManager {
}
}
// Re-export CooldownReason for use by handlers
export { CooldownReason };
export default AccountManager;

View File

@@ -133,6 +133,9 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
actualResetMs: actualResetMs // Original duration from API
};
// Track consecutive failures for progressive backoff (matches opencode-antigravity-auth)
account.consecutiveFailures = (account.consecutiveFailures || 0) + 1;
// Log appropriately based on duration
if (actualResetMs > DEFAULT_COOLDOWN_MS) {
logger.warn(
@@ -235,3 +238,132 @@ export function getRateLimitInfo(accounts, email, modelId) {
waitMs
};
}
/**
* Get the consecutive failure count for an account
* Used for progressive backoff calculation (matches opencode-antigravity-auth)
*
* @param {Array} accounts - Array of account objects
* @param {string} email - Email of the account
* @returns {number} Number of consecutive failures
*/
export function getConsecutiveFailures(accounts, email) {
const account = accounts.find(a => a.email === email);
return account?.consecutiveFailures || 0;
}
/**
* Reset the consecutive failure count for an account
* Called on successful request (matches opencode-antigravity-auth)
*
* @param {Array} accounts - Array of account objects
* @param {string} email - Email of the account
* @returns {boolean} True if account was found and reset
*/
export function resetConsecutiveFailures(accounts, email) {
const account = accounts.find(a => a.email === email);
if (!account) return false;
account.consecutiveFailures = 0;
return true;
}
/**
* Increment the consecutive failure count for an account WITHOUT marking as rate limited
* Used for quick retries where we want to track failures but not skip the account
* (matches opencode-antigravity-auth behavior of always incrementing on 429)
*
* @param {Array} accounts - Array of account objects
* @param {string} email - Email of the account
* @returns {number} New consecutive failure count
*/
export function incrementConsecutiveFailures(accounts, email) {
const account = accounts.find(a => a.email === email);
if (!account) return 0;
account.consecutiveFailures = (account.consecutiveFailures || 0) + 1;
return account.consecutiveFailures;
}
// ============================================================================
// Cooldown Mechanism (matches opencode-antigravity-auth)
// Separate from rate limits - used for temporary backoff after failures
// ============================================================================
/**
* Cooldown reasons for debugging/logging
*/
export const CooldownReason = {
RATE_LIMIT: 'rate_limit',
AUTH_FAILURE: 'auth_failure',
CONSECUTIVE_FAILURES: 'consecutive_failures',
SERVER_ERROR: 'server_error'
};
/**
* Mark an account as cooling down for a specified duration
* Used for temporary backoff separate from rate limits
*
* @param {Array} accounts - Array of account objects
* @param {string} email - Email of the account
* @param {number} cooldownMs - Duration of cooldown in milliseconds
* @param {string} [reason] - Reason for the cooldown
* @returns {boolean} True if account was found and marked
*/
export function markAccountCoolingDown(accounts, email, cooldownMs, reason = CooldownReason.RATE_LIMIT) {
const account = accounts.find(a => a.email === email);
if (!account) return false;
account.coolingDownUntil = Date.now() + cooldownMs;
account.cooldownReason = reason;
logger.debug(`[AccountManager] Account ${email} cooling down for ${formatDuration(cooldownMs)} (reason: ${reason})`);
return true;
}
/**
* Check if an account is currently cooling down
* Automatically clears expired cooldowns
*
* @param {Object} account - Account object
* @returns {boolean} True if account is cooling down
*/
export function isAccountCoolingDown(account) {
if (!account || account.coolingDownUntil === undefined) {
return false;
}
const now = Date.now();
if (now >= account.coolingDownUntil) {
// Cooldown expired - clear it
clearAccountCooldown(account);
return false;
}
return true;
}
/**
* Clear the cooldown for an account
*
* @param {Object} account - Account object
*/
export function clearAccountCooldown(account) {
if (account) {
delete account.coolingDownUntil;
delete account.cooldownReason;
}
}
/**
* Get time remaining until cooldown expires for an account
*
* @param {Object} account - Account object
* @returns {number} Milliseconds until cooldown expires, 0 if not cooling down
*/
export function getCooldownRemaining(account) {
if (!account || account.coolingDownUntil === undefined) {
return 0;
}
const remaining = account.coolingDownUntil - Date.now();
return remaining > 0 ? remaining : 0;
}

View File

@@ -5,6 +5,8 @@
* All strategies must implement the selectAccount method.
*/
import { isAccountCoolingDown } from '../rate-limits.js';
/**
* @typedef {Object} SelectionResult
* @property {Object|null} account - The selected account or null if none available
@@ -77,6 +79,9 @@ export class BaseStrategy {
// Skip disabled accounts
if (account.enabled === false) return false;
// Check if account is cooling down (matches opencode-antigravity-auth)
if (isAccountCoolingDown(account)) return false;
// Check model-specific rate limit
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
const limit = account.modelRateLimits[modelId];

View File

@@ -65,7 +65,7 @@ export class HybridStrategy extends BaseStrategy {
}
// Get candidates that pass all filters
const candidates = this.#getCandidates(accounts, modelId);
const { candidates, fallbackLevel } = this.#getCandidates(accounts, modelId);
if (candidates.length === 0) {
// Diagnose why no candidates are available and compute wait time
@@ -87,16 +87,30 @@ export class HybridStrategy extends BaseStrategy {
const best = scored[0];
best.account.lastUsed = Date.now();
// Consume a token from the bucket
this.#tokenBucketTracker.consume(best.account.email);
// Consume a token from the bucket (unless in lastResort mode where we bypassed token check)
if (fallbackLevel !== 'lastResort') {
this.#tokenBucketTracker.consume(best.account.email);
}
if (onSave) onSave();
// Calculate throttle wait time based on fallback level
// This prevents overwhelming the API when all accounts are stressed
let waitMs = 0;
if (fallbackLevel === 'lastResort') {
// All accounts exhausted - add significant delay to allow rate limits to clear
waitMs = 500;
} else if (fallbackLevel === 'emergency') {
// All accounts unhealthy - add moderate delay
waitMs = 250;
}
const position = best.index + 1;
const total = accounts.length;
logger.info(`[HybridStrategy] Using account: ${best.account.email} (${position}/${total}, score: ${best.score.toFixed(1)})`);
const fallbackInfo = fallbackLevel !== 'normal' ? `, fallback: ${fallbackLevel}` : '';
logger.info(`[HybridStrategy] Using account: ${best.account.email} (${position}/${total}, score: ${best.score.toFixed(1)}${fallbackInfo})`);
return { account: best.account, index: best.index, waitMs: 0 };
return { account: best.account, index: best.index, waitMs };
}
/**
@@ -131,6 +145,8 @@ export class HybridStrategy extends BaseStrategy {
/**
* Get candidates that pass all filters
* @private
* @returns {{candidates: Array, fallbackLevel: string}} Candidates and fallback level used
* fallbackLevel: 'normal' | 'quota' | 'emergency' | 'lastResort'
*/
#getCandidates(accounts, modelId) {
const candidates = accounts
@@ -160,24 +176,56 @@ export class HybridStrategy extends BaseStrategy {
return true;
});
// If no candidates after quota filter, fall back to all usable accounts
// (better to use critical quota than fail entirely)
if (candidates.length === 0) {
const fallback = accounts
.map((account, index) => ({ account, index }))
.filter(({ account }) => {
if (!this.isAccountUsable(account, modelId)) return false;
if (!this.#healthTracker.isUsable(account.email)) return false;
if (!this.#tokenBucketTracker.hasTokens(account.email)) return false;
return true;
});
if (fallback.length > 0) {
logger.warn('[HybridStrategy] All accounts have critical quota, using fallback');
return fallback;
}
if (candidates.length > 0) {
return { candidates, fallbackLevel: 'normal' };
}
return candidates;
// If no candidates after quota filter, fall back to all usable accounts
// (better to use critical quota than fail entirely)
const fallback = accounts
.map((account, index) => ({ account, index }))
.filter(({ account }) => {
if (!this.isAccountUsable(account, modelId)) return false;
if (!this.#healthTracker.isUsable(account.email)) return false;
if (!this.#tokenBucketTracker.hasTokens(account.email)) return false;
return true;
});
if (fallback.length > 0) {
logger.warn('[HybridStrategy] All accounts have critical quota, using fallback');
return { candidates: fallback, fallbackLevel: 'quota' };
}
// Emergency fallback: bypass health check when ALL accounts are unhealthy
// This prevents "Max retries exceeded" when health scores are too low
const emergency = accounts
.map((account, index) => ({ account, index }))
.filter(({ account }) => {
if (!this.isAccountUsable(account, modelId)) return false;
if (!this.#tokenBucketTracker.hasTokens(account.email)) return false;
// Skip health check - use "least bad" account
return true;
});
if (emergency.length > 0) {
logger.warn('[HybridStrategy] EMERGENCY: All accounts unhealthy, using least bad account');
return { candidates: emergency, fallbackLevel: 'emergency' };
}
// Last resort: bypass BOTH health AND token bucket checks
// Only check basic usability (not rate-limited, not disabled)
const lastResort = accounts
.map((account, index) => ({ account, index }))
.filter(({ account }) => {
// Only check if account is usable (not rate-limited, not disabled)
if (!this.isAccountUsable(account, modelId)) return false;
// Skip health and token bucket checks entirely
return true;
});
if (lastResort.length > 0) {
logger.warn('[HybridStrategy] LAST RESORT: All accounts exhausted, using any usable account');
return { candidates: lastResort, fallbackLevel: 'lastResort' };
}
return { candidates: [], fallbackLevel: 'normal' };
}
/**
@@ -202,11 +250,11 @@ export class HybridStrategy extends BaseStrategy {
const quotaComponent = quotaScore * this.#weights.quota;
// LRU component (older = higher score)
// Use time since last use, capped at 1 hour for scoring
// Use time since last use in seconds, capped at 1 hour (matches opencode-antigravity-auth)
const lastUsed = account.lastUsed || 0;
const timeSinceLastUse = Math.min(Date.now() - lastUsed, 3600000); // Cap at 1 hour
const lruMinutes = timeSinceLastUse / 60000;
const lruComponent = lruMinutes * this.#weights.lru;
const lruSeconds = timeSinceLastUse / 1000;
const lruComponent = lruSeconds * this.#weights.lru; // 0-3600 * 0.1 = 0-360 max
return healthComponent + tokenComponent + quotaComponent + lruComponent;
}

View File

@@ -12,7 +12,7 @@ const DEFAULT_CONFIG = {
successReward: 1, // Points on successful request
rateLimitPenalty: -10, // Points on rate limit
failurePenalty: -20, // Points on other failures
recoveryPerHour: 2, // Passive recovery rate
recoveryPerHour: 10, // Passive recovery rate (increased from 2 for faster recovery)
minUsable: 50, // Minimum score to be selected
maxScore: 100 // Maximum score cap
};

View File

@@ -11,63 +11,92 @@ import {
MAX_WAIT_BEFORE_ERROR_MS,
DEFAULT_COOLDOWN_MS,
RATE_LIMIT_DEDUP_WINDOW_MS,
RATE_LIMIT_STATE_RESET_MS,
FIRST_RETRY_DELAY_MS,
SWITCH_ACCOUNT_DELAY_MS,
MAX_CONSECUTIVE_FAILURES,
EXTENDED_COOLDOWN_MS,
CAPACITY_RETRY_DELAY_MS,
CAPACITY_BACKOFF_TIERS_MS,
MAX_CAPACITY_RETRIES,
BACKOFF_BY_ERROR_TYPE,
QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
MIN_BACKOFF_MS,
isThinkingModel
} from '../constants.js';
import { convertGoogleToAnthropic } from '../format/index.js';
import { isRateLimitError, isAuthError } from '../errors.js';
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
import { logger } from '../utils/logger.js';
import { parseResetTime } from './rate-limit-parser.js';
import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
import { parseThinkingSSEResponse } from './sse-parser.js';
import { getFallbackModel } from '../fallback-config.js';
/**
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
* Tracks last rate limit timestamp per model to skip duplicate retries
* Rate limit deduplication - prevents thundering herd on concurrent rate limits.
* Tracks rate limit state per account+model including consecutive429 count and timestamps.
*/
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
/**
* Check if we should skip retry due to recent rate limit on this model
* Get deduplication key for rate limit tracking
* @param {string} email - Account email
* @param {string} model - Model ID
* @returns {boolean} True if retry should be skipped (within dedup window)
* @returns {string} Dedup key
*/
function shouldSkipRetryDueToDedup(model) {
const lastTimestamp = lastRateLimitTimestamps.get(model);
if (!lastTimestamp) return false;
function getDedupKey(email, model) {
return `${email}:${model}`;
}
const elapsed = Date.now() - lastTimestamp;
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
return true;
/**
* Get rate limit backoff with deduplication and exponential backoff (matches opencode-antigravity-auth)
* @param {string} email - Account email
* @param {string} model - Model ID
* @param {number|null} serverRetryAfterMs - Server-provided retry time
* @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
*/
function getRateLimitBackoff(email, model, serverRetryAfterMs) {
const now = Date.now();
const stateKey = getDedupKey(email, model);
const previous = rateLimitStateByAccountModel.get(stateKey);
// Check if within dedup window - return duplicate status
if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
}
return false;
// Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
? previous.consecutive429 + 1
: 1;
// Update state
rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
// Calculate exponential backoff
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
}
/**
* Record rate limit timestamp for deduplication
* Clear rate limit state after successful request
* @param {string} email - Account email
* @param {string} model - Model ID
*/
function recordRateLimitTimestamp(model) {
lastRateLimitTimestamps.set(model, Date.now());
function clearRateLimitState(email, model) {
const key = getDedupKey(email, model);
rateLimitStateByAccountModel.delete(key);
}
/**
* Clear rate limit timestamp after successful retry
* @param {string} model - Model ID
*/
function clearRateLimitTimestamp(model) {
lastRateLimitTimestamps.delete(model);
}
/**
* Gap 3: Detect permanent authentication failures that require re-authentication
* These should mark the account as invalid rather than just clearing cache
* Detect permanent authentication failures that require re-authentication.
* These should mark the account as invalid rather than just clearing cache.
* @param {string} errorText - Error message from API
* @returns {boolean} True if permanent auth failure
*/
@@ -82,8 +111,8 @@ function isPermanentAuthFailure(errorText) {
}
/**
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
* Capacity issues should retry on same account with shorter delay
* Detect if 429 error is due to model capacity (not user quota).
* Capacity issues should retry on same account with shorter delay.
* @param {string} errorText - Error message from API
* @returns {boolean} True if capacity exhausted (not quota)
*/
@@ -95,16 +124,47 @@ function isModelCapacityExhausted(errorText) {
lower.includes('service temporarily unavailable');
}
// Periodically clean up stale dedup timestamps (every 60 seconds)
// Periodically clean up stale rate limit state (every 60 seconds)
setInterval(() => {
const cutoff = Date.now() - 60000; // 1 minute
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
if (timestamp < cutoff) {
lastRateLimitTimestamps.delete(model);
const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
for (const [key, state] of rateLimitStateByAccountModel.entries()) {
if (state.lastAt < cutoff) {
rateLimitStateByAccountModel.delete(key);
}
}
}, 60000);
/**
* Calculate smart backoff based on error type (matches opencode-antigravity-auth)
* @param {string} errorText - Error message
* @param {number|null} serverResetMs - Reset time from server
* @param {number} consecutiveFailures - Number of consecutive failures
* @returns {number} Backoff time in milliseconds
*/
function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
// If server provides a reset time, use it (with minimum floor to prevent loops)
if (serverResetMs && serverResetMs > 0) {
return Math.max(serverResetMs, MIN_BACKOFF_MS);
}
const reason = parseRateLimitReason(errorText);
switch (reason) {
case 'QUOTA_EXHAUSTED':
// Progressive backoff: [60s, 5m, 30m, 2h]
const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
case 'RATE_LIMIT_EXCEEDED':
return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
case 'MODEL_CAPACITY_EXHAUSTED':
return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
case 'SERVER_ERROR':
return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
default:
return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
}
}
/**
* Send a non-streaming request to Cloud Code with multi-account support
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
@@ -174,7 +234,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
// Select account using configured strategy
const { account, waitMs } = accountManager.selectAccount(model);
// If strategy returns a wait time, sleep and retry
// If strategy returns a wait time without an account, sleep and retry
if (!account && waitMs > 0) {
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
await sleep(waitMs + 500);
@@ -182,6 +242,13 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
continue;
}
// If strategy returns an account with throttle wait (fallback mode), apply delay
// This prevents overwhelming the API when using emergency/lastResort fallbacks
if (account && waitMs > 0) {
logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
await sleep(waitMs);
}
if (!account) {
logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
continue;
@@ -197,8 +264,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
// Try each endpoint with index-based loop for capacity retry support
let lastError = null;
let retriedOnce = false; // Track if we've already retried for short rate limit
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
let capacityRetryCount = 0;
let endpointIndex = 0;
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
@@ -219,7 +285,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
if (response.status === 401) {
// Gap 3: Check for permanent auth failures
// Check for permanent auth failures
if (isPermanentAuthFailure(errorText)) {
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
@@ -236,12 +302,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
if (response.status === 429) {
const resetMs = parseResetTime(response, errorText);
const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
// Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
if (isModelCapacityExhausted(errorText)) {
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
// Progressive capacity backoff tiers
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
capacityRetryCount++;
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
// Track failures for progressive backoff escalation (matches opencode-antigravity-auth)
accountManager.incrementConsecutiveFailures(account.email);
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
@@ -251,39 +322,80 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
}
// Gap 1: Check deduplication window to prevent thundering herd
if (shouldSkipRetryDueToDedup(model)) {
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
// Get rate limit backoff with exponential backoff and state reset
const backoff = getRateLimitBackoff(account.email, model, resetMs);
// For very short rate limits (< 1 second), always wait and retry
// Switching accounts won't help when all accounts have per-second rate limits
if (resetMs !== null && resetMs < 1000) {
const waitMs = resetMs;
logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
}
// If within dedup window AND reset time is >= 1s, switch account
if (backoff.isDuplicate) {
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
accountManager.markRateLimited(account.email, smartBackoffMs, model);
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
}
// Calculate smart backoff based on error type
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
// Decision: wait and retry OR switch account
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
// Long-term quota exhaustion (> 10s) - switch to next account
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
accountManager.markRateLimited(account.email, resetMs, model);
// First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
// Quick 1s retry on first 429 (matches opencode-antigravity-auth)
const waitMs = backoff.delayMs;
// markRateLimited already increments consecutiveFailures internally
// This prevents concurrent retry storms and ensures progressive backoff escalation
accountManager.markRateLimited(account.email, waitMs, model);
logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
} else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
// Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
await sleep(SWITCH_ACCOUNT_DELAY_MS);
accountManager.markRateLimited(account.email, smartBackoffMs, model);
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
} else {
// Short-term rate limit (<= 10s) - wait and retry once
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
if (!retriedOnce) {
retriedOnce = true;
recordRateLimitTimestamp(model); // Gap 1: Record before retry
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
} else {
// Already retried once, mark and switch
accountManager.markRateLimited(account.email, waitMs, model);
throw new Error(`RATE_LIMITED: ${errorText}`);
}
// Short-term rate limit but not first attempt - use exponential backoff delay
const waitMs = backoff.delayMs;
// markRateLimited already increments consecutiveFailures internally
accountManager.markRateLimited(account.email, waitMs, model);
logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
}
}
if (response.status >= 400) {
// Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
if (response.status === 503 && isModelCapacityExhausted(errorText)) {
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
// Progressive capacity backoff tiers (same as 429 capacity handling)
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
capacityRetryCount++;
accountManager.incrementConsecutiveFailures(account.email);
logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
}
// Max capacity retries exceeded - switch account
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
}
lastError = new Error(`API error ${response.status}: ${errorText}`);
// Try next endpoint for 403/404/5xx errors (matches opencode-antigravity-auth behavior)
if (response.status === 403 || response.status === 404) {
@@ -300,8 +412,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
// For thinking models, parse SSE and accumulate all parts
if (isThinking) {
const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
// Gap 1: Clear timestamp on success
clearRateLimitTimestamp(model);
// Clear rate limit state on success
clearRateLimitState(account.email, model);
accountManager.notifySuccess(account, model);
return result;
}
@@ -309,8 +421,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
// Non-thinking models use regular JSON
const data = await response.json();
logger.debug('[CloudCode] Response received');
// Gap 1: Clear timestamp on success
clearRateLimitTimestamp(model);
// Clear rate limit state on success
clearRateLimitState(account.email, model);
accountManager.notifySuccess(account, model);
return convertGoogleToAnthropic(data, anthropicRequest.model);
@@ -350,13 +462,15 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
accountManager.notifyFailure(account, model);
// Gap 2: Check consecutive failures for extended cooldown
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
// Track 5xx errors for extended cooldown
// Note: markRateLimited already increments consecutiveFailures internally
const currentFailures = accountManager.getConsecutiveFailures(account.email);
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
} else {
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
accountManager.incrementConsecutiveFailures(account.email);
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
}
continue;
}
@@ -364,13 +478,15 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
if (isNetworkError(error)) {
accountManager.notifyFailure(account, model);
// Gap 2: Check consecutive failures for extended cooldown
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
// Track network errors for extended cooldown
// Note: markRateLimited already increments consecutiveFailures internally
const currentFailures = accountManager.getConsecutiveFailures(account.email);
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
} else {
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
accountManager.incrementConsecutiveFailures(account.email);
logger.warn(`[CloudCode] Network error for ${account.email} (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
}
await sleep(1000);
continue;

View File

@@ -167,15 +167,69 @@ export function parseResetTime(responseOrError, errorText = '') {
}
}
// SANITY CHECK: Enforce strict minimums for found rate limits
// If we found a reset time, but it's very small (e.g. < 1s) or negative,
// explicitly bump it up to avoid "Available in 0s" loops.
// SANITY CHECK: Handle very small or negative reset times
// For sub-second rate limits (common with per-second quotas), add a small buffer
// For negative or zero, use a reasonable minimum
if (resetMs !== null) {
if (resetMs < 1000) {
logger.debug(`[CloudCode] Reset time too small (${resetMs}ms), enforcing 2s buffer`);
resetMs = 2000;
if (resetMs <= 0) {
logger.debug(`[CloudCode] Reset time invalid (${resetMs}ms), using 500ms default`);
resetMs = 500;
} else if (resetMs < 500) {
// Very short reset - add 200ms buffer for network latency
logger.debug(`[CloudCode] Short reset time (${resetMs}ms), adding 200ms buffer`);
resetMs = resetMs + 200;
}
// Note: No longer enforcing 2s minimum - this was causing cascading failures
// when all accounts had short rate limits simultaneously
}
return resetMs;
}
/**
* Parse the rate limit reason from error text
* Used for smart backoff by error type (matches opencode-antigravity-auth)
*
* @param {string} errorText - Error message/body text
* @returns {'RATE_LIMIT_EXCEEDED' | 'QUOTA_EXHAUSTED' | 'MODEL_CAPACITY_EXHAUSTED' | 'SERVER_ERROR' | 'UNKNOWN'} Error reason
*/
export function parseRateLimitReason(errorText) {
const lower = (errorText || '').toLowerCase();
// Check for quota exhaustion (daily/hourly limits)
if (lower.includes('quota_exhausted') ||
lower.includes('quotaresetdelay') ||
lower.includes('quotaresettimestamp') ||
lower.includes('resource_exhausted') ||
lower.includes('daily limit') ||
lower.includes('quota exceeded')) {
return 'QUOTA_EXHAUSTED';
}
// Check for model capacity issues (temporary, retry quickly)
if (lower.includes('model_capacity_exhausted') ||
lower.includes('capacity_exhausted') ||
lower.includes('model is currently overloaded') ||
lower.includes('service temporarily unavailable')) {
return 'MODEL_CAPACITY_EXHAUSTED';
}
// Check for rate limiting (per-minute limits)
if (lower.includes('rate_limit_exceeded') ||
lower.includes('rate limit') ||
lower.includes('too many requests') ||
lower.includes('throttl')) {
return 'RATE_LIMIT_EXCEEDED';
}
// Check for server errors
if (lower.includes('internal server error') ||
lower.includes('server error') ||
lower.includes('503') ||
lower.includes('502') ||
lower.includes('504')) {
return 'SERVER_ERROR';
}
return 'UNKNOWN';
}

View File

@@ -12,61 +12,90 @@ import {
MAX_WAIT_BEFORE_ERROR_MS,
DEFAULT_COOLDOWN_MS,
RATE_LIMIT_DEDUP_WINDOW_MS,
RATE_LIMIT_STATE_RESET_MS,
FIRST_RETRY_DELAY_MS,
SWITCH_ACCOUNT_DELAY_MS,
MAX_CONSECUTIVE_FAILURES,
EXTENDED_COOLDOWN_MS,
CAPACITY_RETRY_DELAY_MS,
MAX_CAPACITY_RETRIES
CAPACITY_BACKOFF_TIERS_MS,
MAX_CAPACITY_RETRIES,
BACKOFF_BY_ERROR_TYPE,
QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
MIN_BACKOFF_MS
} from '../constants.js';
import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
import { logger } from '../utils/logger.js';
import { parseResetTime } from './rate-limit-parser.js';
import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
import { streamSSEResponse } from './sse-streamer.js';
import { getFallbackModel } from '../fallback-config.js';
import crypto from 'crypto';
/**
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
* Tracks last rate limit timestamp per model to skip duplicate retries
* Rate limit deduplication - prevents thundering herd on concurrent rate limits.
* Tracks rate limit state per account+model including consecutive429 count and timestamps.
*/
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
/**
* Check if we should skip retry due to recent rate limit on this model
* Get deduplication key for rate limit tracking
* @param {string} email - Account email
* @param {string} model - Model ID
* @returns {boolean} True if retry should be skipped (within dedup window)
* @returns {string} Dedup key
*/
function shouldSkipRetryDueToDedup(model) {
const lastTimestamp = lastRateLimitTimestamps.get(model);
if (!lastTimestamp) return false;
function getDedupKey(email, model) {
return `${email}:${model}`;
}
const elapsed = Date.now() - lastTimestamp;
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
return true;
/**
* Get rate limit backoff with deduplication and exponential backoff (matches opencode-antigravity-auth)
* @param {string} email - Account email
* @param {string} model - Model ID
* @param {number|null} serverRetryAfterMs - Server-provided retry time
* @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
*/
function getRateLimitBackoff(email, model, serverRetryAfterMs) {
const now = Date.now();
const stateKey = getDedupKey(email, model);
const previous = rateLimitStateByAccountModel.get(stateKey);
// Check if within dedup window - return duplicate status
if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
}
return false;
// Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
? previous.consecutive429 + 1
: 1;
// Update state
rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
// Calculate exponential backoff
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
}
/**
* Record rate limit timestamp for deduplication
* Clear rate limit state after successful request
* @param {string} email - Account email
* @param {string} model - Model ID
*/
function recordRateLimitTimestamp(model) {
lastRateLimitTimestamps.set(model, Date.now());
function clearRateLimitState(email, model) {
const key = getDedupKey(email, model);
rateLimitStateByAccountModel.delete(key);
}
/**
* Clear rate limit timestamp after successful retry
* @param {string} model - Model ID
*/
function clearRateLimitTimestamp(model) {
lastRateLimitTimestamps.delete(model);
}
/**
* Gap 3: Detect permanent authentication failures that require re-authentication
* Detect permanent authentication failures that require re-authentication.
* @param {string} errorText - Error message from API
* @returns {boolean} True if permanent auth failure
*/
@@ -81,7 +110,7 @@ function isPermanentAuthFailure(errorText) {
}
/**
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
* Detect if 429 error is due to model capacity (not user quota).
* @param {string} errorText - Error message from API
* @returns {boolean} True if capacity exhausted (not quota)
*/
@@ -93,16 +122,47 @@ function isModelCapacityExhausted(errorText) {
lower.includes('service temporarily unavailable');
}
// Periodically clean up stale dedup timestamps (every 60 seconds)
// Periodically clean up stale rate limit state (every 60 seconds)
setInterval(() => {
const cutoff = Date.now() - 60000; // 1 minute
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
if (timestamp < cutoff) {
lastRateLimitTimestamps.delete(model);
const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
for (const [key, state] of rateLimitStateByAccountModel.entries()) {
if (state.lastAt < cutoff) {
rateLimitStateByAccountModel.delete(key);
}
}
}, 60000);
/**
* Calculate smart backoff based on error type (matches opencode-antigravity-auth)
* @param {string} errorText - Error message
* @param {number|null} serverResetMs - Reset time from server
* @param {number} consecutiveFailures - Number of consecutive failures
* @returns {number} Backoff time in milliseconds
*/
function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
// If server provides a reset time, use it (with minimum floor to prevent loops)
if (serverResetMs && serverResetMs > 0) {
return Math.max(serverResetMs, MIN_BACKOFF_MS);
}
const reason = parseRateLimitReason(errorText);
switch (reason) {
case 'QUOTA_EXHAUSTED':
// Progressive backoff: [60s, 5m, 30m, 2h]
const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
case 'RATE_LIMIT_EXCEEDED':
return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
case 'MODEL_CAPACITY_EXHAUSTED':
return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
case 'SERVER_ERROR':
return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
default:
return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
}
}
/**
* Send a streaming request to Cloud Code with multi-account support
* Streams events in real-time as they arrive from the server
@@ -172,7 +232,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
// Select account using configured strategy
const { account, waitMs } = accountManager.selectAccount(model);
// If strategy returns a wait time, sleep and retry
// If strategy returns a wait time without an account, sleep and retry
if (!account && waitMs > 0) {
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
await sleep(waitMs + 500);
@@ -180,6 +240,13 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
continue;
}
// If strategy returns an account with throttle wait (fallback mode), apply delay
// This prevents overwhelming the API when using emergency/lastResort fallbacks
if (account && waitMs > 0) {
logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
await sleep(waitMs);
}
if (!account) {
logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
continue;
@@ -195,8 +262,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
// Try each endpoint with index-based loop for capacity retry support
let lastError = null;
let retriedOnce = false; // Track if we've already retried for short rate limit
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
let capacityRetryCount = 0;
let endpointIndex = 0;
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
@@ -215,7 +281,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
if (response.status === 401) {
// Gap 3: Check for permanent auth failures
// Check for permanent auth failures
if (isPermanentAuthFailure(errorText)) {
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
@@ -231,12 +297,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
if (response.status === 429) {
const resetMs = parseResetTime(response, errorText);
const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
// Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
if (isModelCapacityExhausted(errorText)) {
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
// Progressive capacity backoff tiers
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
capacityRetryCount++;
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
// Track failures for progressive backoff escalation (matches opencode-antigravity-auth)
accountManager.incrementConsecutiveFailures(account.email);
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
@@ -246,38 +317,78 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
}
// Gap 1: Check deduplication window to prevent thundering herd
if (shouldSkipRetryDueToDedup(model)) {
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
// Get rate limit backoff with exponential backoff and state reset
const backoff = getRateLimitBackoff(account.email, model, resetMs);
// For very short rate limits (< 1 second), always wait and retry
// Switching accounts won't help when all accounts have per-second rate limits
if (resetMs !== null && resetMs < 1000) {
const waitMs = resetMs;
logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
}
// If within dedup window AND reset time is >= 1s, switch account
if (backoff.isDuplicate) {
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
accountManager.markRateLimited(account.email, smartBackoffMs, model);
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
}
// Calculate smart backoff based on error type
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
// Decision: wait and retry OR switch account
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
// Long-term quota exhaustion (> 10s) - switch to next account
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
accountManager.markRateLimited(account.email, resetMs, model);
// First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
// Quick 1s retry on first 429 (matches opencode-antigravity-auth)
const waitMs = backoff.delayMs;
// markRateLimited already increments consecutiveFailures internally
accountManager.markRateLimited(account.email, waitMs, model);
logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
} else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
// Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
await sleep(SWITCH_ACCOUNT_DELAY_MS);
accountManager.markRateLimited(account.email, smartBackoffMs, model);
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
} else {
// Short-term rate limit (<= 10s) - wait and retry once
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
if (!retriedOnce) {
retriedOnce = true;
recordRateLimitTimestamp(model); // Gap 1: Record before retry
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
} else {
// Already retried once, mark and switch
accountManager.markRateLimited(account.email, waitMs, model);
throw new Error(`RATE_LIMITED: ${errorText}`);
}
// Short-term rate limit but not first attempt - use exponential backoff delay
const waitMs = backoff.delayMs;
// markRateLimited already increments consecutiveFailures internally
accountManager.markRateLimited(account.email, waitMs, model);
logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
}
}
// Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
if (response.status === 503 && isModelCapacityExhausted(errorText)) {
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
// Progressive capacity backoff tiers (same as 429 capacity handling)
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
capacityRetryCount++;
accountManager.incrementConsecutiveFailures(account.email);
logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
await sleep(waitMs);
// Don't increment endpointIndex - retry same endpoint
continue;
}
// Max capacity retries exceeded - switch account
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
}
lastError = new Error(`API error ${response.status}: ${errorText}`);
// Try next endpoint for 403/404/5xx errors (matches opencode-antigravity-auth behavior)
@@ -299,8 +410,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
try {
yield* streamSSEResponse(currentResponse, anthropicRequest.model);
logger.debug('[CloudCode] Stream completed');
// Gap 1: Clear timestamp on success
clearRateLimitTimestamp(model);
// Clear rate limit state on success
clearRateLimitState(account.email, model);
accountManager.notifySuccess(account, model);
return;
} catch (streamError) {
@@ -409,13 +520,15 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
accountManager.notifyFailure(account, model);
// Gap 2: Check consecutive failures for extended cooldown
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
// Track 5xx errors for extended cooldown
// Note: markRateLimited already increments consecutiveFailures internally
const currentFailures = accountManager.getConsecutiveFailures(account.email);
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
} else {
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
accountManager.incrementConsecutiveFailures(account.email);
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
}
continue;
}
@@ -423,13 +536,15 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
if (isNetworkError(error)) {
accountManager.notifyFailure(account, model);
// Gap 2: Check consecutive failures for extended cooldown
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
// Track network errors for extended cooldown
// Note: markRateLimited already increments consecutiveFailures internally
const currentFailures = accountManager.getConsecutiveFailures(account.email);
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
} else {
logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
accountManager.incrementConsecutiveFailures(account.email);
logger.warn(`[CloudCode] Network error for ${account.email} (stream) (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
}
await sleep(1000);
continue;

View File

@@ -16,6 +16,11 @@ const DEFAULT_CONFIG = {
defaultCooldownMs: 10000, // 10 seconds
maxWaitBeforeErrorMs: 120000, // 2 minutes
maxAccounts: 10, // Maximum number of accounts allowed
// Rate limit handling (matches opencode-antigravity-auth)
rateLimitDedupWindowMs: 2000, // 2 seconds - prevents concurrent retry storms
maxConsecutiveFailures: 3, // Before applying extended cooldown
extendedCooldownMs: 60000, // 1 minute extended cooldown
maxCapacityRetries: 5, // Max retries for capacity exhaustion
modelMapping: {},
// Account selection strategy configuration
accountSelection: {

View File

@@ -103,16 +103,33 @@ export const MAX_ACCOUNTS = config?.maxAccounts || 10; // From config or 10
// Rate limit wait thresholds
export const MAX_WAIT_BEFORE_ERROR_MS = config?.maxWaitBeforeErrorMs || 120000; // From config or 2 minutes
// Gap 1: Retry deduplication - prevents thundering herd on concurrent rate limits
export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 5000; // 5 seconds
// Retry deduplication - prevents thundering herd on concurrent rate limits
export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 2000; // 2 seconds
export const RATE_LIMIT_STATE_RESET_MS = config?.rateLimitStateResetMs || 120000; // 2 minutes - reset consecutive429 after inactivity
export const FIRST_RETRY_DELAY_MS = config?.firstRetryDelayMs || 1000; // Quick 1s retry on first 429
export const SWITCH_ACCOUNT_DELAY_MS = config?.switchAccountDelayMs || 5000; // Delay before switching accounts
// Gap 2: Consecutive failure tracking - extended cooldown after repeated failures
// Consecutive failure tracking - extended cooldown after repeated failures
export const MAX_CONSECUTIVE_FAILURES = config?.maxConsecutiveFailures || 3;
export const EXTENDED_COOLDOWN_MS = config?.extendedCooldownMs || 60000; // 1 minute
// Gap 4: Capacity exhaustion - shorter retry for model capacity issues (not quota)
export const CAPACITY_RETRY_DELAY_MS = config?.capacityRetryDelayMs || 2000; // 2 seconds
export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 3;
// Capacity exhaustion - progressive backoff tiers for model capacity issues
export const CAPACITY_BACKOFF_TIERS_MS = config?.capacityBackoffTiersMs || [5000, 10000, 20000, 30000, 60000];
export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 5;
// Smart backoff by error type
export const BACKOFF_BY_ERROR_TYPE = {
RATE_LIMIT_EXCEEDED: 30000, // 30 seconds
MODEL_CAPACITY_EXHAUSTED: 15000, // 15 seconds
SERVER_ERROR: 20000, // 20 seconds
UNKNOWN: 60000 // 1 minute
};
// Progressive backoff tiers for QUOTA_EXHAUSTED (60s, 5m, 30m, 2h)
export const QUOTA_EXHAUSTED_BACKOFF_TIERS_MS = [60000, 300000, 1800000, 7200000];
// Minimum backoff floor to prevent "Available in 0s" loops (matches opencode-antigravity-auth)
export const MIN_BACKOFF_MS = 2000;
// Thinking model constants
export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length
@@ -258,10 +275,16 @@ export default {
MAX_ACCOUNTS,
MAX_WAIT_BEFORE_ERROR_MS,
RATE_LIMIT_DEDUP_WINDOW_MS,
RATE_LIMIT_STATE_RESET_MS,
FIRST_RETRY_DELAY_MS,
SWITCH_ACCOUNT_DELAY_MS,
MAX_CONSECUTIVE_FAILURES,
EXTENDED_COOLDOWN_MS,
CAPACITY_RETRY_DELAY_MS,
CAPACITY_BACKOFF_TIERS_MS,
MAX_CAPACITY_RETRIES,
BACKOFF_BY_ERROR_TYPE,
QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
MIN_BACKOFF_MS,
MIN_SIGNATURE_LENGTH,
GEMINI_MAX_OUTPUT_TOKENS,
GEMINI_SKIP_SIGNATURE,

View File

@@ -397,7 +397,7 @@ export function mountWebUI(app, dirname, accountManager) {
*/
app.post('/api/config', (req, res) => {
try {
const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs, maxAccounts, accountSelection, rateLimitDedupWindowMs, maxConsecutiveFailures, extendedCooldownMs, capacityRetryDelayMs, maxCapacityRetries } = req.body;
const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs, maxAccounts, accountSelection, rateLimitDedupWindowMs, maxConsecutiveFailures, extendedCooldownMs, maxCapacityRetries } = req.body;
// Only allow updating specific fields (security)
const updates = {};
@@ -435,9 +435,6 @@ export function mountWebUI(app, dirname, accountManager) {
if (typeof extendedCooldownMs === 'number' && extendedCooldownMs >= 10000 && extendedCooldownMs <= 300000) {
updates.extendedCooldownMs = extendedCooldownMs;
}
if (typeof capacityRetryDelayMs === 'number' && capacityRetryDelayMs >= 500 && capacityRetryDelayMs <= 10000) {
updates.capacityRetryDelayMs = capacityRetryDelayMs;
}
if (typeof maxCapacityRetries === 'number' && maxCapacityRetries >= 1 && maxCapacityRetries <= 10) {
updates.maxCapacityRetries = maxCapacityRetries;
}

93
tests/stress-test.cjs Normal file
View File

@@ -0,0 +1,93 @@
/**
* Stress Test - Send multiple concurrent requests to test rate limit handling
*
* Usage: node tests/stress-test.cjs [count] [model]
* Example: node tests/stress-test.cjs 10 gemini-3-flash
*/
const BASE_URL = process.env.ANTHROPIC_BASE_URL || 'http://localhost:8080';
const count = parseInt(process.argv[2]) || 8;
const model = process.argv[3] || 'gemini-3-flash';
async function sendRequest(id) {
const startTime = Date.now();
try {
const response = await fetch(`${BASE_URL}/v1/messages`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01'
},
body: JSON.stringify({
model: model,
max_tokens: 100,
messages: [
{ role: 'user', content: `Request ${id}: Say "Hello ${id}" and nothing else.` }
]
})
});
const elapsed = Date.now() - startTime;
if (!response.ok) {
const errorText = await response.text();
console.log(`[${id}] ❌ ${response.status} after ${elapsed}ms: ${errorText.substring(0, 100)}`);
return { id, success: false, status: response.status, elapsed };
}
const data = await response.json();
const text = data.content?.[0]?.text?.substring(0, 50) || 'No text';
console.log(`[${id}] ✅ 200 after ${elapsed}ms: "${text}..."`);
return { id, success: true, status: 200, elapsed };
} catch (error) {
const elapsed = Date.now() - startTime;
console.log(`[${id}] ❌ Error after ${elapsed}ms: ${error.message}`);
return { id, success: false, error: error.message, elapsed };
}
}
async function runStressTest() {
console.log(`\n🚀 Stress Test: Sending ${count} concurrent requests to ${model}\n`);
console.log(`Target: ${BASE_URL}/v1/messages\n`);
console.log('─'.repeat(70));
const startTime = Date.now();
// Send all requests concurrently
const promises = [];
for (let i = 1; i <= count; i++) {
promises.push(sendRequest(i));
}
const results = await Promise.all(promises);
const totalElapsed = Date.now() - startTime;
console.log('─'.repeat(70));
// Summary
const successful = results.filter(r => r.success).length;
const failed = results.filter(r => !r.success).length;
const avgElapsed = Math.round(results.reduce((sum, r) => sum + r.elapsed, 0) / results.length);
console.log(`\n📊 Summary:`);
console.log(` Total time: ${totalElapsed}ms`);
console.log(` Successful: ${successful}/${count}`);
console.log(` Failed: ${failed}/${count}`);
console.log(` Avg response time: ${avgElapsed}ms`);
if (failed > 0) {
const errors = results.filter(r => !r.success);
const statusCounts = {};
errors.forEach(e => {
const key = e.status || 'network';
statusCounts[key] = (statusCounts[key] || 0) + 1;
});
console.log(` Error breakdown: ${JSON.stringify(statusCounts)}`);
}
console.log('');
}
runStressTest().catch(console.error);