feat: comprehensive rate limit handling overhaul (inspired by opencode-antigravity-auth)
This commit addresses "Max retries exceeded" errors during stress testing where all accounts would become exhausted simultaneously due to short per-second rate limits triggering cascading failures. ## Rate Limit Parser (`rate-limit-parser.js`) - Remove 2s buffer enforcement that caused cascading failures when API returned short reset times (200-600ms). Now adds 200ms buffer for sub-500ms resets - Add `parseRateLimitReason()` for smart backoff based on error type: QUOTA_EXHAUSTED, RATE_LIMIT_EXCEEDED, MODEL_CAPACITY_EXHAUSTED, SERVER_ERROR ## Message/Streaming Handlers - Add per-account+model rate limit state tracking with exponential backoff - For short rate limits (< 1 second), wait and retry on same account instead of switching - prevents thundering herd when all accounts hit per-second limits - Add throttle wait support for fallback modes (emergency/lastResort) - Add `calculateSmartBackoff()` with progressive tiers by error type ## HybridStrategy (`hybrid-strategy.js`) - Refactor `#getCandidates()` to return 4 fallback levels: - `normal`: All filters pass (health, tokens, quota) - `quota`: Bypass critical quota check - `emergency`: Bypass health check when ALL accounts unhealthy - `lastResort`: Bypass BOTH health AND token bucket checks - Add throttle wait times: 500ms for lastResort, 250ms for emergency - Fix LRU calculation to use seconds (matches opencode-antigravity-auth) ## Health Tracker - Increase `recoveryPerHour` from 2 to 10 for faster recovery (1 hour vs 5 hours) ## Account Manager - Add consecutive failure tracking: `getConsecutiveFailures()`, `incrementConsecutiveFailures()`, `resetConsecutiveFailures()` - Add cooldown mechanism separate from rate limits with `CooldownReason` - Reset consecutive failures on successful request ## Base Strategy - Add `isAccountCoolingDown()` check in `isAccountUsable()` ## Constants - Replace fixed `CAPACITY_RETRY_DELAY_MS` with progressive `CAPACITY_BACKOFF_TIERS_MS` - Add `BACKOFF_BY_ERROR_TYPE` for smart backoff - Add `QUOTA_EXHAUSTED_BACKOFF_TIERS_MS` for progressive quota backoff - Add `MIN_BACKOFF_MS` floor to prevent "Available in 0s" loops - Increase `MAX_CAPACITY_RETRIES` from 3 to 5 - Reduce `RATE_LIMIT_DEDUP_WINDOW_MS` from 5s to 2s ## Frontend - Remove `capacityRetryDelayMs` config (replaced by progressive tiers) - Update default `maxCapacityRetries` display from 3 to 5 ## Testing - Add `tests/stress-test.cjs` for concurrent request stress testing Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -15,7 +15,15 @@ import {
|
||||
markRateLimited as markLimited,
|
||||
markInvalid as markAccountInvalid,
|
||||
getMinWaitTimeMs as getMinWait,
|
||||
getRateLimitInfo as getLimitInfo
|
||||
getRateLimitInfo as getLimitInfo,
|
||||
getConsecutiveFailures as getFailures,
|
||||
resetConsecutiveFailures as resetFailures,
|
||||
incrementConsecutiveFailures as incrementFailures,
|
||||
markAccountCoolingDown as markCoolingDown,
|
||||
isAccountCoolingDown as checkCoolingDown,
|
||||
clearAccountCooldown as clearCooldown,
|
||||
getCooldownRemaining as getCooldownMs,
|
||||
CooldownReason
|
||||
} from './rate-limits.js';
|
||||
import {
|
||||
getTokenForAccount as fetchToken,
|
||||
@@ -182,6 +190,10 @@ export class AccountManager {
|
||||
if (this.#strategy) {
|
||||
this.#strategy.onSuccess(account, modelId);
|
||||
}
|
||||
// Reset consecutive failures on success (matches opencode-antigravity-auth)
|
||||
if (account?.email) {
|
||||
resetFailures(this.#accounts, account.email);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -206,6 +218,26 @@ export class AccountManager {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the consecutive failure count for an account
|
||||
* Used for progressive backoff calculation
|
||||
* @param {string} email - Account email
|
||||
* @returns {number} Number of consecutive failures
|
||||
*/
|
||||
getConsecutiveFailures(email) {
|
||||
return getFailures(this.#accounts, email);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment the consecutive failure count without marking as rate limited
|
||||
* Used for quick retries to track failures while staying on same account
|
||||
* @param {string} email - Account email
|
||||
* @returns {number} New consecutive failure count
|
||||
*/
|
||||
incrementConsecutiveFailures(email) {
|
||||
return incrementFailures(this.#accounts, email);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current strategy name
|
||||
* @returns {string} Strategy name
|
||||
@@ -275,6 +307,52 @@ export class AccountManager {
|
||||
return getLimitInfo(this.#accounts, email, modelId);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cooldown Methods (matches opencode-antigravity-auth)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Mark an account as cooling down for a specified duration
|
||||
* Used for temporary backoff separate from rate limits
|
||||
* @param {string} email - Email of the account
|
||||
* @param {number} cooldownMs - Duration of cooldown in milliseconds
|
||||
* @param {string} [reason] - Reason for the cooldown (use CooldownReason constants)
|
||||
*/
|
||||
markAccountCoolingDown(email, cooldownMs, reason = CooldownReason.RATE_LIMIT) {
|
||||
markCoolingDown(this.#accounts, email, cooldownMs, reason);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an account is currently cooling down
|
||||
* @param {string} email - Email of the account
|
||||
* @returns {boolean} True if account is cooling down
|
||||
*/
|
||||
isAccountCoolingDown(email) {
|
||||
const account = this.#accounts.find(a => a.email === email);
|
||||
return account ? checkCoolingDown(account) : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the cooldown for an account
|
||||
* @param {string} email - Email of the account
|
||||
*/
|
||||
clearAccountCooldown(email) {
|
||||
const account = this.#accounts.find(a => a.email === email);
|
||||
if (account) {
|
||||
clearCooldown(account);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get time remaining until cooldown expires for an account
|
||||
* @param {string} email - Email of the account
|
||||
* @returns {number} Milliseconds until cooldown expires, 0 if not cooling down
|
||||
*/
|
||||
getCooldownRemaining(email) {
|
||||
const account = this.#accounts.find(a => a.email === email);
|
||||
return account ? getCooldownMs(account) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get OAuth token for an account
|
||||
* @param {Object} account - Account object with email and credentials
|
||||
@@ -378,4 +456,7 @@ export class AccountManager {
|
||||
}
|
||||
}
|
||||
|
||||
// Re-export CooldownReason for use by handlers
|
||||
export { CooldownReason };
|
||||
|
||||
export default AccountManager;
|
||||
|
||||
Reference in New Issue
Block a user