feat: comprehensive rate limit handling overhaul (inspired by opencode-antigravity-auth)

This commit addresses "Max retries exceeded" errors during stress testing where all accounts would become exhausted simultaneously due to short per-second rate limits triggering cascading failures. ## Rate Limit Parser (`rate-limit-parser.js`) - Remove 2s buffer enforcement that caused cascading failures when API returned short reset times (200-600ms). Now adds 200ms buffer for sub-500ms resets - Add `parseRateLimitReason()` for smart backoff based on error type: QUOTA_EXHAUSTED, RATE_LIMIT_EXCEEDED, MODEL_CAPACITY_EXHAUSTED, SERVER_ERROR ## Message/Streaming Handlers - Add per-account+model rate limit state tracking with exponential backoff - For short rate limits (< 1 second), wait and retry on same account instead of switching - prevents thundering herd when all accounts hit per-second limits - Add throttle wait support for fallback modes (emergency/lastResort) - Add `calculateSmartBackoff()` with progressive tiers by error type ## HybridStrategy (`hybrid-strategy.js`) - Refactor `#getCandidates()` to return 4 fallback levels: - `normal`: All filters pass (health, tokens, quota) - `quota`: Bypass critical quota check - `emergency`: Bypass health check when ALL accounts unhealthy - `lastResort`: Bypass BOTH health AND token bucket checks - Add throttle wait times: 500ms for lastResort, 250ms for emergency - Fix LRU calculation to use seconds (matches opencode-antigravity-auth) ## Health Tracker - Increase `recoveryPerHour` from 2 to 10 for faster recovery (1 hour vs 5 hours) ## Account Manager - Add consecutive failure tracking: `getConsecutiveFailures()`, `incrementConsecutiveFailures()`, `resetConsecutiveFailures()` - Add cooldown mechanism separate from rate limits with `CooldownReason` - Reset consecutive failures on successful request ## Base Strategy - Add `isAccountCoolingDown()` check in `isAccountUsable()` ## Constants - Replace fixed `CAPACITY_RETRY_DELAY_MS` with progressive `CAPACITY_BACKOFF_TIERS_MS` - Add `BACKOFF_BY_ERROR_TYPE` for smart backoff - Add `QUOTA_EXHAUSTED_BACKOFF_TIERS_MS` for progressive quota backoff - Add `MIN_BACKOFF_MS` floor to prevent "Available in 0s" loops - Increase `MAX_CAPACITY_RETRIES` from 3 to 5 - Reduce `RATE_LIMIT_DEDUP_WINDOW_MS` from 5s to 2s ## Frontend - Remove `capacityRetryDelayMs` config (replaced by progressive tiers) - Update default `maxCapacityRetries` display from 3 to 5 ## Testing - Add `tests/stress-test.cjs` for concurrent request stress testing Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-24 22:43:53 +05:30
parent 71b9b001fd
commit 5a85f0cfcc
20 changed files with 869 additions and 244 deletions
--- a/public/js/components/server-config.js
+++ b/public/js/components/server-config.js
@@ -274,12 +274,6 @@ window.Components.serverConfig = () => ({
            (v) => window.Validators.validateTimeout(v, EXTENDED_COOLDOWN_MIN, EXTENDED_COOLDOWN_MAX));
    },

-    toggleCapacityRetryDelayMs(value) {
-        const { CAPACITY_RETRY_DELAY_MIN, CAPACITY_RETRY_DELAY_MAX } = window.AppConstants.VALIDATION;
-        this.saveConfigField('capacityRetryDelayMs', value, 'Capacity Retry Delay',
-            (v) => window.Validators.validateTimeout(v, CAPACITY_RETRY_DELAY_MIN, CAPACITY_RETRY_DELAY_MAX));
-    },
-
    toggleMaxCapacityRetries(value) {
        const { MAX_CAPACITY_RETRIES_MIN, MAX_CAPACITY_RETRIES_MAX } = window.AppConstants.VALIDATION;
        this.saveConfigField('maxCapacityRetries', value, 'Max Capacity Retries',
--- a/public/js/config/constants.js
+++ b/public/js/config/constants.js
@@ -85,10 +85,6 @@ window.AppConstants.VALIDATION = {
    EXTENDED_COOLDOWN_MIN: 10000,
    EXTENDED_COOLDOWN_MAX: 300000,

-    // Capacity retry delay (500ms - 10 seconds)
-    CAPACITY_RETRY_DELAY_MIN: 500,
-    CAPACITY_RETRY_DELAY_MAX: 10000,
-
    // Capacity retries (1 - 10)
    MAX_CAPACITY_RETRIES_MIN: 1,
    MAX_CAPACITY_RETRIES_MAX: 10
--- a/public/js/translations/en.js
+++ b/public/js/translations/en.js
@@ -245,8 +245,6 @@ window.translations.en = {
    maxConsecutiveFailuresDesc: "Number of consecutive failures before applying extended cooldown to an account.",
    extendedCooldown: "Extended Cooldown",
    extendedCooldownDesc: "Cooldown duration applied after max consecutive failures reached.",
-    capacityRetryDelay: "Capacity Retry Delay",
-    capacityRetryDelayDesc: "Delay before retrying when model capacity is exhausted (not quota).",
    maxCapacityRetries: "Max Capacity Retries",
    maxCapacityRetriesDesc: "Maximum retries for capacity exhaustion before switching accounts.",
    saveConfigServer: "Save Configuration",
--- a/public/js/translations/id.js
+++ b/public/js/translations/id.js
@@ -278,8 +278,6 @@ window.translations.id = {
    maxConsecutiveFailuresDesc: "Jumlah kegagalan berturut-turut sebelum menerapkan cooldown diperpanjang.",
    extendedCooldown: "Cooldown Diperpanjang",
    extendedCooldownDesc: "Durasi cooldown setelah mencapai maks. kegagalan berturut-turut.",
-    capacityRetryDelay: "Jeda Retry Kapasitas",
-    capacityRetryDelayDesc: "Jeda sebelum retry saat kapasitas model habis (bukan kuota).",
    maxCapacityRetries: "Maks. Retry Kapasitas",
    maxCapacityRetriesDesc: "Maksimum retry untuk kehabisan kapasitas sebelum ganti akun.",
    saveConfigServer: "Simpan Konfigurasi",
--- a/public/js/translations/pt.js
+++ b/public/js/translations/pt.js
@@ -223,8 +223,6 @@ window.translations.pt = {
    maxConsecutiveFailuresDesc: "Número de falhas consecutivas antes de aplicar resfriamento estendido.",
    extendedCooldown: "Resfriamento Estendido",
    extendedCooldownDesc: "Duração do resfriamento aplicado após atingir máx. de falhas consecutivas.",
-    capacityRetryDelay: "Atraso de Retry de Capacidade",
-    capacityRetryDelayDesc: "Atraso antes de tentar novamente quando capacidade do modelo está esgotada (não quota).",
    maxCapacityRetries: "Máx. Retries de Capacidade",
    maxCapacityRetriesDesc: "Máximo de retries para esgotamento de capacidade antes de trocar conta.",
    saveConfigServer: "Salvar Configuração",
--- a/public/js/translations/tr.js
+++ b/public/js/translations/tr.js
@@ -227,8 +227,6 @@ window.translations.tr = {
    maxConsecutiveFailuresDesc: "Uzatılmış soğuma uygulamadan önce ardışık başarısızlık sayısı.",
    extendedCooldown: "Uzatılmış Soğuma",
    extendedCooldownDesc: "Maks. ardışık başarısızlık sonrası uygulanan soğuma süresi.",
-    capacityRetryDelay: "Kapasite Yeniden Deneme Gecikmesi",
-    capacityRetryDelayDesc: "Model kapasitesi tükendiğinde (kota değil) yeniden denemeden önceki gecikme.",
    maxCapacityRetries: "Maks. Kapasite Yeniden Denemesi",
    maxCapacityRetriesDesc: "Hesap değiştirmeden önce kapasite tükenmesi için maksimum yeniden deneme.",
    saveConfigServer: "Yapılandırmayı Kaydet",
--- a/public/js/translations/zh.js
+++ b/public/js/translations/zh.js
@@ -245,8 +245,6 @@ window.translations.zh = {
    maxConsecutiveFailuresDesc: "触发扩展冷却前允许的连续失败次数。",
    extendedCooldown: "扩展冷却时间",
    extendedCooldownDesc: "达到最大连续失败后应用的冷却时长。",
-    capacityRetryDelay: "容量重试延迟",
-    capacityRetryDelayDesc: "模型容量耗尽（非配额）时重试前的延迟。",
    maxCapacityRetries: "最大容量重试次数",
    maxCapacityRetriesDesc: "容量耗尽时在切换账号前的最大重试次数。",
    saveConfigServer: "保存配置",