feat: comprehensive rate limit handling overhaul (inspired by opencode-antigravity-auth)

This commit addresses "Max retries exceeded" errors during stress testing where
all accounts would become exhausted simultaneously due to short per-second rate
limits triggering cascading failures.

## Rate Limit Parser (`rate-limit-parser.js`)
- Remove 2s buffer enforcement that caused cascading failures when API returned
  short reset times (200-600ms). Now adds 200ms buffer for sub-500ms resets
- Add `parseRateLimitReason()` for smart backoff based on error type:
  QUOTA_EXHAUSTED, RATE_LIMIT_EXCEEDED, MODEL_CAPACITY_EXHAUSTED, SERVER_ERROR

## Message/Streaming Handlers
- Add per-account+model rate limit state tracking with exponential backoff
- For short rate limits (< 1 second), wait and retry on same account instead
  of switching - prevents thundering herd when all accounts hit per-second limits
- Add throttle wait support for fallback modes (emergency/lastResort)
- Add `calculateSmartBackoff()` with progressive tiers by error type

## HybridStrategy (`hybrid-strategy.js`)
- Refactor `#getCandidates()` to return 4 fallback levels:
  - `normal`: All filters pass (health, tokens, quota)
  - `quota`: Bypass critical quota check
  - `emergency`: Bypass health check when ALL accounts unhealthy
  - `lastResort`: Bypass BOTH health AND token bucket checks
- Add throttle wait times: 500ms for lastResort, 250ms for emergency
- Fix LRU calculation to use seconds (matches opencode-antigravity-auth)

## Health Tracker
- Increase `recoveryPerHour` from 2 to 10 for faster recovery (1 hour vs 5 hours)

## Account Manager
- Add consecutive failure tracking: `getConsecutiveFailures()`,
  `incrementConsecutiveFailures()`, `resetConsecutiveFailures()`
- Add cooldown mechanism separate from rate limits with `CooldownReason`
- Reset consecutive failures on successful request

## Base Strategy
- Add `isAccountCoolingDown()` check in `isAccountUsable()`

## Constants
- Replace fixed `CAPACITY_RETRY_DELAY_MS` with progressive `CAPACITY_BACKOFF_TIERS_MS`
- Add `BACKOFF_BY_ERROR_TYPE` for smart backoff
- Add `QUOTA_EXHAUSTED_BACKOFF_TIERS_MS` for progressive quota backoff
- Add `MIN_BACKOFF_MS` floor to prevent "Available in 0s" loops
- Increase `MAX_CAPACITY_RETRIES` from 3 to 5
- Reduce `RATE_LIMIT_DEDUP_WINDOW_MS` from 5s to 2s

## Frontend
- Remove `capacityRetryDelayMs` config (replaced by progressive tiers)
- Update default `maxCapacityRetries` display from 3 to 5

## Testing
- Add `tests/stress-test.cjs` for concurrent request stress testing

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2026-01-24 22:43:53 +05:30
parent 71b9b001fd
commit 5a85f0cfcc
20 changed files with 869 additions and 244 deletions

View File

@@ -15,7 +15,15 @@ import {
markRateLimited as markLimited,
markInvalid as markAccountInvalid,
getMinWaitTimeMs as getMinWait,
getRateLimitInfo as getLimitInfo
getRateLimitInfo as getLimitInfo,
getConsecutiveFailures as getFailures,
resetConsecutiveFailures as resetFailures,
incrementConsecutiveFailures as incrementFailures,
markAccountCoolingDown as markCoolingDown,
isAccountCoolingDown as checkCoolingDown,
clearAccountCooldown as clearCooldown,
getCooldownRemaining as getCooldownMs,
CooldownReason
} from './rate-limits.js';
import {
getTokenForAccount as fetchToken,
@@ -182,6 +190,10 @@ export class AccountManager {
if (this.#strategy) {
this.#strategy.onSuccess(account, modelId);
}
// Reset consecutive failures on success (matches opencode-antigravity-auth)
if (account?.email) {
resetFailures(this.#accounts, account.email);
}
}
/**
@@ -206,6 +218,26 @@ export class AccountManager {
}
}
/**
* Get the consecutive failure count for an account
* Used for progressive backoff calculation
* @param {string} email - Account email
* @returns {number} Number of consecutive failures
*/
getConsecutiveFailures(email) {
return getFailures(this.#accounts, email);
}
/**
* Increment the consecutive failure count without marking as rate limited
* Used for quick retries to track failures while staying on same account
* @param {string} email - Account email
* @returns {number} New consecutive failure count
*/
incrementConsecutiveFailures(email) {
return incrementFailures(this.#accounts, email);
}
/**
* Get the current strategy name
* @returns {string} Strategy name
@@ -275,6 +307,52 @@ export class AccountManager {
return getLimitInfo(this.#accounts, email, modelId);
}
// ============================================================================
// Cooldown Methods (matches opencode-antigravity-auth)
// ============================================================================
/**
* Mark an account as cooling down for a specified duration
* Used for temporary backoff separate from rate limits
* @param {string} email - Email of the account
* @param {number} cooldownMs - Duration of cooldown in milliseconds
* @param {string} [reason] - Reason for the cooldown (use CooldownReason constants)
*/
markAccountCoolingDown(email, cooldownMs, reason = CooldownReason.RATE_LIMIT) {
markCoolingDown(this.#accounts, email, cooldownMs, reason);
}
/**
* Check if an account is currently cooling down
* @param {string} email - Email of the account
* @returns {boolean} True if account is cooling down
*/
isAccountCoolingDown(email) {
const account = this.#accounts.find(a => a.email === email);
return account ? checkCoolingDown(account) : false;
}
/**
* Clear the cooldown for an account
* @param {string} email - Email of the account
*/
clearAccountCooldown(email) {
const account = this.#accounts.find(a => a.email === email);
if (account) {
clearCooldown(account);
}
}
/**
* Get time remaining until cooldown expires for an account
* @param {string} email - Email of the account
* @returns {number} Milliseconds until cooldown expires, 0 if not cooling down
*/
getCooldownRemaining(email) {
const account = this.#accounts.find(a => a.email === email);
return account ? getCooldownMs(account) : 0;
}
/**
* Get OAuth token for an account
* @param {Object} account - Account object with email and credentials
@@ -378,4 +456,7 @@ export class AccountManager {
}
}
// Re-export CooldownReason for use by handlers
export { CooldownReason };
export default AccountManager;