feat: comprehensive rate limit handling overhaul (inspired by opencode-antigravity-auth)
This commit addresses "Max retries exceeded" errors during stress testing where all accounts would become exhausted simultaneously due to short per-second rate limits triggering cascading failures. ## Rate Limit Parser (`rate-limit-parser.js`) - Remove 2s buffer enforcement that caused cascading failures when API returned short reset times (200-600ms). Now adds 200ms buffer for sub-500ms resets - Add `parseRateLimitReason()` for smart backoff based on error type: QUOTA_EXHAUSTED, RATE_LIMIT_EXCEEDED, MODEL_CAPACITY_EXHAUSTED, SERVER_ERROR ## Message/Streaming Handlers - Add per-account+model rate limit state tracking with exponential backoff - For short rate limits (< 1 second), wait and retry on same account instead of switching - prevents thundering herd when all accounts hit per-second limits - Add throttle wait support for fallback modes (emergency/lastResort) - Add `calculateSmartBackoff()` with progressive tiers by error type ## HybridStrategy (`hybrid-strategy.js`) - Refactor `#getCandidates()` to return 4 fallback levels: - `normal`: All filters pass (health, tokens, quota) - `quota`: Bypass critical quota check - `emergency`: Bypass health check when ALL accounts unhealthy - `lastResort`: Bypass BOTH health AND token bucket checks - Add throttle wait times: 500ms for lastResort, 250ms for emergency - Fix LRU calculation to use seconds (matches opencode-antigravity-auth) ## Health Tracker - Increase `recoveryPerHour` from 2 to 10 for faster recovery (1 hour vs 5 hours) ## Account Manager - Add consecutive failure tracking: `getConsecutiveFailures()`, `incrementConsecutiveFailures()`, `resetConsecutiveFailures()` - Add cooldown mechanism separate from rate limits with `CooldownReason` - Reset consecutive failures on successful request ## Base Strategy - Add `isAccountCoolingDown()` check in `isAccountUsable()` ## Constants - Replace fixed `CAPACITY_RETRY_DELAY_MS` with progressive `CAPACITY_BACKOFF_TIERS_MS` - Add `BACKOFF_BY_ERROR_TYPE` for smart backoff - Add `QUOTA_EXHAUSTED_BACKOFF_TIERS_MS` for progressive quota backoff - Add `MIN_BACKOFF_MS` floor to prevent "Available in 0s" loops - Increase `MAX_CAPACITY_RETRIES` from 3 to 5 - Reduce `RATE_LIMIT_DEDUP_WINDOW_MS` from 5s to 2s ## Frontend - Remove `capacityRetryDelayMs` config (replaced by progressive tiers) - Update default `maxCapacityRetries` display from 3 to 5 ## Testing - Add `tests/stress-test.cjs` for concurrent request stress testing Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -274,12 +274,6 @@ window.Components.serverConfig = () => ({
|
|||||||
(v) => window.Validators.validateTimeout(v, EXTENDED_COOLDOWN_MIN, EXTENDED_COOLDOWN_MAX));
|
(v) => window.Validators.validateTimeout(v, EXTENDED_COOLDOWN_MIN, EXTENDED_COOLDOWN_MAX));
|
||||||
},
|
},
|
||||||
|
|
||||||
toggleCapacityRetryDelayMs(value) {
|
|
||||||
const { CAPACITY_RETRY_DELAY_MIN, CAPACITY_RETRY_DELAY_MAX } = window.AppConstants.VALIDATION;
|
|
||||||
this.saveConfigField('capacityRetryDelayMs', value, 'Capacity Retry Delay',
|
|
||||||
(v) => window.Validators.validateTimeout(v, CAPACITY_RETRY_DELAY_MIN, CAPACITY_RETRY_DELAY_MAX));
|
|
||||||
},
|
|
||||||
|
|
||||||
toggleMaxCapacityRetries(value) {
|
toggleMaxCapacityRetries(value) {
|
||||||
const { MAX_CAPACITY_RETRIES_MIN, MAX_CAPACITY_RETRIES_MAX } = window.AppConstants.VALIDATION;
|
const { MAX_CAPACITY_RETRIES_MIN, MAX_CAPACITY_RETRIES_MAX } = window.AppConstants.VALIDATION;
|
||||||
this.saveConfigField('maxCapacityRetries', value, 'Max Capacity Retries',
|
this.saveConfigField('maxCapacityRetries', value, 'Max Capacity Retries',
|
||||||
|
|||||||
@@ -85,10 +85,6 @@ window.AppConstants.VALIDATION = {
|
|||||||
EXTENDED_COOLDOWN_MIN: 10000,
|
EXTENDED_COOLDOWN_MIN: 10000,
|
||||||
EXTENDED_COOLDOWN_MAX: 300000,
|
EXTENDED_COOLDOWN_MAX: 300000,
|
||||||
|
|
||||||
// Capacity retry delay (500ms - 10 seconds)
|
|
||||||
CAPACITY_RETRY_DELAY_MIN: 500,
|
|
||||||
CAPACITY_RETRY_DELAY_MAX: 10000,
|
|
||||||
|
|
||||||
// Capacity retries (1 - 10)
|
// Capacity retries (1 - 10)
|
||||||
MAX_CAPACITY_RETRIES_MIN: 1,
|
MAX_CAPACITY_RETRIES_MIN: 1,
|
||||||
MAX_CAPACITY_RETRIES_MAX: 10
|
MAX_CAPACITY_RETRIES_MAX: 10
|
||||||
|
|||||||
@@ -245,8 +245,6 @@ window.translations.en = {
|
|||||||
maxConsecutiveFailuresDesc: "Number of consecutive failures before applying extended cooldown to an account.",
|
maxConsecutiveFailuresDesc: "Number of consecutive failures before applying extended cooldown to an account.",
|
||||||
extendedCooldown: "Extended Cooldown",
|
extendedCooldown: "Extended Cooldown",
|
||||||
extendedCooldownDesc: "Cooldown duration applied after max consecutive failures reached.",
|
extendedCooldownDesc: "Cooldown duration applied after max consecutive failures reached.",
|
||||||
capacityRetryDelay: "Capacity Retry Delay",
|
|
||||||
capacityRetryDelayDesc: "Delay before retrying when model capacity is exhausted (not quota).",
|
|
||||||
maxCapacityRetries: "Max Capacity Retries",
|
maxCapacityRetries: "Max Capacity Retries",
|
||||||
maxCapacityRetriesDesc: "Maximum retries for capacity exhaustion before switching accounts.",
|
maxCapacityRetriesDesc: "Maximum retries for capacity exhaustion before switching accounts.",
|
||||||
saveConfigServer: "Save Configuration",
|
saveConfigServer: "Save Configuration",
|
||||||
|
|||||||
@@ -278,8 +278,6 @@ window.translations.id = {
|
|||||||
maxConsecutiveFailuresDesc: "Jumlah kegagalan berturut-turut sebelum menerapkan cooldown diperpanjang.",
|
maxConsecutiveFailuresDesc: "Jumlah kegagalan berturut-turut sebelum menerapkan cooldown diperpanjang.",
|
||||||
extendedCooldown: "Cooldown Diperpanjang",
|
extendedCooldown: "Cooldown Diperpanjang",
|
||||||
extendedCooldownDesc: "Durasi cooldown setelah mencapai maks. kegagalan berturut-turut.",
|
extendedCooldownDesc: "Durasi cooldown setelah mencapai maks. kegagalan berturut-turut.",
|
||||||
capacityRetryDelay: "Jeda Retry Kapasitas",
|
|
||||||
capacityRetryDelayDesc: "Jeda sebelum retry saat kapasitas model habis (bukan kuota).",
|
|
||||||
maxCapacityRetries: "Maks. Retry Kapasitas",
|
maxCapacityRetries: "Maks. Retry Kapasitas",
|
||||||
maxCapacityRetriesDesc: "Maksimum retry untuk kehabisan kapasitas sebelum ganti akun.",
|
maxCapacityRetriesDesc: "Maksimum retry untuk kehabisan kapasitas sebelum ganti akun.",
|
||||||
saveConfigServer: "Simpan Konfigurasi",
|
saveConfigServer: "Simpan Konfigurasi",
|
||||||
|
|||||||
@@ -223,8 +223,6 @@ window.translations.pt = {
|
|||||||
maxConsecutiveFailuresDesc: "Número de falhas consecutivas antes de aplicar resfriamento estendido.",
|
maxConsecutiveFailuresDesc: "Número de falhas consecutivas antes de aplicar resfriamento estendido.",
|
||||||
extendedCooldown: "Resfriamento Estendido",
|
extendedCooldown: "Resfriamento Estendido",
|
||||||
extendedCooldownDesc: "Duração do resfriamento aplicado após atingir máx. de falhas consecutivas.",
|
extendedCooldownDesc: "Duração do resfriamento aplicado após atingir máx. de falhas consecutivas.",
|
||||||
capacityRetryDelay: "Atraso de Retry de Capacidade",
|
|
||||||
capacityRetryDelayDesc: "Atraso antes de tentar novamente quando capacidade do modelo está esgotada (não quota).",
|
|
||||||
maxCapacityRetries: "Máx. Retries de Capacidade",
|
maxCapacityRetries: "Máx. Retries de Capacidade",
|
||||||
maxCapacityRetriesDesc: "Máximo de retries para esgotamento de capacidade antes de trocar conta.",
|
maxCapacityRetriesDesc: "Máximo de retries para esgotamento de capacidade antes de trocar conta.",
|
||||||
saveConfigServer: "Salvar Configuração",
|
saveConfigServer: "Salvar Configuração",
|
||||||
|
|||||||
@@ -227,8 +227,6 @@ window.translations.tr = {
|
|||||||
maxConsecutiveFailuresDesc: "Uzatılmış soğuma uygulamadan önce ardışık başarısızlık sayısı.",
|
maxConsecutiveFailuresDesc: "Uzatılmış soğuma uygulamadan önce ardışık başarısızlık sayısı.",
|
||||||
extendedCooldown: "Uzatılmış Soğuma",
|
extendedCooldown: "Uzatılmış Soğuma",
|
||||||
extendedCooldownDesc: "Maks. ardışık başarısızlık sonrası uygulanan soğuma süresi.",
|
extendedCooldownDesc: "Maks. ardışık başarısızlık sonrası uygulanan soğuma süresi.",
|
||||||
capacityRetryDelay: "Kapasite Yeniden Deneme Gecikmesi",
|
|
||||||
capacityRetryDelayDesc: "Model kapasitesi tükendiğinde (kota değil) yeniden denemeden önceki gecikme.",
|
|
||||||
maxCapacityRetries: "Maks. Kapasite Yeniden Denemesi",
|
maxCapacityRetries: "Maks. Kapasite Yeniden Denemesi",
|
||||||
maxCapacityRetriesDesc: "Hesap değiştirmeden önce kapasite tükenmesi için maksimum yeniden deneme.",
|
maxCapacityRetriesDesc: "Hesap değiştirmeden önce kapasite tükenmesi için maksimum yeniden deneme.",
|
||||||
saveConfigServer: "Yapılandırmayı Kaydet",
|
saveConfigServer: "Yapılandırmayı Kaydet",
|
||||||
|
|||||||
@@ -245,8 +245,6 @@ window.translations.zh = {
|
|||||||
maxConsecutiveFailuresDesc: "触发扩展冷却前允许的连续失败次数。",
|
maxConsecutiveFailuresDesc: "触发扩展冷却前允许的连续失败次数。",
|
||||||
extendedCooldown: "扩展冷却时间",
|
extendedCooldown: "扩展冷却时间",
|
||||||
extendedCooldownDesc: "达到最大连续失败后应用的冷却时长。",
|
extendedCooldownDesc: "达到最大连续失败后应用的冷却时长。",
|
||||||
capacityRetryDelay: "容量重试延迟",
|
|
||||||
capacityRetryDelayDesc: "模型容量耗尽(非配额)时重试前的延迟。",
|
|
||||||
maxCapacityRetries: "最大容量重试次数",
|
maxCapacityRetries: "最大容量重试次数",
|
||||||
maxCapacityRetriesDesc: "容量耗尽时在切换账号前的最大重试次数。",
|
maxCapacityRetriesDesc: "容量耗尽时在切换账号前的最大重试次数。",
|
||||||
saveConfigServer: "保存配置",
|
saveConfigServer: "保存配置",
|
||||||
|
|||||||
@@ -1226,47 +1226,23 @@
|
|||||||
x-text="$store.global.t('extendedCooldownDesc')">Applied after max consecutive failures.</p>
|
x-text="$store.global.t('extendedCooldownDesc')">Applied after max consecutive failures.</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="form-control">
|
|
||||||
<label class="label pt-0">
|
|
||||||
<span class="label-text text-gray-400 text-xs"
|
|
||||||
x-text="$store.global.t('capacityRetryDelay')">Capacity Retry Delay</span>
|
|
||||||
<span class="label-text-alt font-mono text-neon-cyan text-xs font-semibold"
|
|
||||||
x-text="Math.round((serverConfig.capacityRetryDelayMs || 2000) / 1000) + 's'"></span>
|
|
||||||
</label>
|
|
||||||
<div class="flex gap-3 items-center">
|
|
||||||
<input type="range" min="500" max="10000" step="500"
|
|
||||||
class="custom-range custom-range-cyan flex-1"
|
|
||||||
:value="serverConfig.capacityRetryDelayMs || 2000"
|
|
||||||
:style="`background-size: ${((serverConfig.capacityRetryDelayMs || 2000) - 500) / 95}% 100%`"
|
|
||||||
@input="toggleCapacityRetryDelayMs($event.target.value)"
|
|
||||||
aria-label="Capacity retry delay slider">
|
|
||||||
<input type="number" min="500" max="10000" step="500"
|
|
||||||
class="input input-xs input-bordered w-20 bg-space-800 border-space-border text-white font-mono text-center"
|
|
||||||
:value="serverConfig.capacityRetryDelayMs || 2000"
|
|
||||||
@change="toggleCapacityRetryDelayMs($event.target.value)"
|
|
||||||
aria-label="Capacity retry delay value">
|
|
||||||
</div>
|
|
||||||
<p class="text-[9px] text-gray-600 mt-1 leading-tight"
|
|
||||||
x-text="$store.global.t('capacityRetryDelayDesc')">Delay for capacity (not quota) issues.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="form-control">
|
<div class="form-control">
|
||||||
<label class="label pt-0">
|
<label class="label pt-0">
|
||||||
<span class="label-text text-gray-400 text-xs"
|
<span class="label-text text-gray-400 text-xs"
|
||||||
x-text="$store.global.t('maxCapacityRetries')">Max Capacity Retries</span>
|
x-text="$store.global.t('maxCapacityRetries')">Max Capacity Retries</span>
|
||||||
<span class="label-text-alt font-mono text-neon-cyan text-xs font-semibold"
|
<span class="label-text-alt font-mono text-neon-cyan text-xs font-semibold"
|
||||||
x-text="serverConfig.maxCapacityRetries || 3"></span>
|
x-text="serverConfig.maxCapacityRetries || 5"></span>
|
||||||
</label>
|
</label>
|
||||||
<div class="flex gap-3 items-center">
|
<div class="flex gap-3 items-center">
|
||||||
<input type="range" min="1" max="10" step="1"
|
<input type="range" min="1" max="10" step="1"
|
||||||
class="custom-range custom-range-cyan flex-1"
|
class="custom-range custom-range-cyan flex-1"
|
||||||
:value="serverConfig.maxCapacityRetries || 3"
|
:value="serverConfig.maxCapacityRetries || 5"
|
||||||
:style="`background-size: ${((serverConfig.maxCapacityRetries || 3) - 1) / 0.09}% 100%`"
|
:style="`background-size: ${((serverConfig.maxCapacityRetries || 5) - 1) / 0.09}% 100%`"
|
||||||
@input="toggleMaxCapacityRetries($event.target.value)"
|
@input="toggleMaxCapacityRetries($event.target.value)"
|
||||||
aria-label="Max capacity retries slider">
|
aria-label="Max capacity retries slider">
|
||||||
<input type="number" min="1" max="10" step="1"
|
<input type="number" min="1" max="10" step="1"
|
||||||
class="input input-xs input-bordered w-16 bg-space-800 border-space-border text-white font-mono text-center"
|
class="input input-xs input-bordered w-16 bg-space-800 border-space-border text-white font-mono text-center"
|
||||||
:value="serverConfig.maxCapacityRetries || 3"
|
:value="serverConfig.maxCapacityRetries || 5"
|
||||||
@change="toggleMaxCapacityRetries($event.target.value)"
|
@change="toggleMaxCapacityRetries($event.target.value)"
|
||||||
aria-label="Max capacity retries value">
|
aria-label="Max capacity retries value">
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -15,7 +15,15 @@ import {
|
|||||||
markRateLimited as markLimited,
|
markRateLimited as markLimited,
|
||||||
markInvalid as markAccountInvalid,
|
markInvalid as markAccountInvalid,
|
||||||
getMinWaitTimeMs as getMinWait,
|
getMinWaitTimeMs as getMinWait,
|
||||||
getRateLimitInfo as getLimitInfo
|
getRateLimitInfo as getLimitInfo,
|
||||||
|
getConsecutiveFailures as getFailures,
|
||||||
|
resetConsecutiveFailures as resetFailures,
|
||||||
|
incrementConsecutiveFailures as incrementFailures,
|
||||||
|
markAccountCoolingDown as markCoolingDown,
|
||||||
|
isAccountCoolingDown as checkCoolingDown,
|
||||||
|
clearAccountCooldown as clearCooldown,
|
||||||
|
getCooldownRemaining as getCooldownMs,
|
||||||
|
CooldownReason
|
||||||
} from './rate-limits.js';
|
} from './rate-limits.js';
|
||||||
import {
|
import {
|
||||||
getTokenForAccount as fetchToken,
|
getTokenForAccount as fetchToken,
|
||||||
@@ -182,6 +190,10 @@ export class AccountManager {
|
|||||||
if (this.#strategy) {
|
if (this.#strategy) {
|
||||||
this.#strategy.onSuccess(account, modelId);
|
this.#strategy.onSuccess(account, modelId);
|
||||||
}
|
}
|
||||||
|
// Reset consecutive failures on success (matches opencode-antigravity-auth)
|
||||||
|
if (account?.email) {
|
||||||
|
resetFailures(this.#accounts, account.email);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -206,6 +218,26 @@ export class AccountManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the consecutive failure count for an account
|
||||||
|
* Used for progressive backoff calculation
|
||||||
|
* @param {string} email - Account email
|
||||||
|
* @returns {number} Number of consecutive failures
|
||||||
|
*/
|
||||||
|
getConsecutiveFailures(email) {
|
||||||
|
return getFailures(this.#accounts, email);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the consecutive failure count without marking as rate limited
|
||||||
|
* Used for quick retries to track failures while staying on same account
|
||||||
|
* @param {string} email - Account email
|
||||||
|
* @returns {number} New consecutive failure count
|
||||||
|
*/
|
||||||
|
incrementConsecutiveFailures(email) {
|
||||||
|
return incrementFailures(this.#accounts, email);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the current strategy name
|
* Get the current strategy name
|
||||||
* @returns {string} Strategy name
|
* @returns {string} Strategy name
|
||||||
@@ -275,6 +307,52 @@ export class AccountManager {
|
|||||||
return getLimitInfo(this.#accounts, email, modelId);
|
return getLimitInfo(this.#accounts, email, modelId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Cooldown Methods (matches opencode-antigravity-auth)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark an account as cooling down for a specified duration
|
||||||
|
* Used for temporary backoff separate from rate limits
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @param {number} cooldownMs - Duration of cooldown in milliseconds
|
||||||
|
* @param {string} [reason] - Reason for the cooldown (use CooldownReason constants)
|
||||||
|
*/
|
||||||
|
markAccountCoolingDown(email, cooldownMs, reason = CooldownReason.RATE_LIMIT) {
|
||||||
|
markCoolingDown(this.#accounts, email, cooldownMs, reason);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if an account is currently cooling down
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @returns {boolean} True if account is cooling down
|
||||||
|
*/
|
||||||
|
isAccountCoolingDown(email) {
|
||||||
|
const account = this.#accounts.find(a => a.email === email);
|
||||||
|
return account ? checkCoolingDown(account) : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear the cooldown for an account
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
*/
|
||||||
|
clearAccountCooldown(email) {
|
||||||
|
const account = this.#accounts.find(a => a.email === email);
|
||||||
|
if (account) {
|
||||||
|
clearCooldown(account);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get time remaining until cooldown expires for an account
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @returns {number} Milliseconds until cooldown expires, 0 if not cooling down
|
||||||
|
*/
|
||||||
|
getCooldownRemaining(email) {
|
||||||
|
const account = this.#accounts.find(a => a.email === email);
|
||||||
|
return account ? getCooldownMs(account) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get OAuth token for an account
|
* Get OAuth token for an account
|
||||||
* @param {Object} account - Account object with email and credentials
|
* @param {Object} account - Account object with email and credentials
|
||||||
@@ -378,4 +456,7 @@ export class AccountManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Re-export CooldownReason for use by handlers
|
||||||
|
export { CooldownReason };
|
||||||
|
|
||||||
export default AccountManager;
|
export default AccountManager;
|
||||||
|
|||||||
@@ -133,6 +133,9 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
|
|||||||
actualResetMs: actualResetMs // Original duration from API
|
actualResetMs: actualResetMs // Original duration from API
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Track consecutive failures for progressive backoff (matches opencode-antigravity-auth)
|
||||||
|
account.consecutiveFailures = (account.consecutiveFailures || 0) + 1;
|
||||||
|
|
||||||
// Log appropriately based on duration
|
// Log appropriately based on duration
|
||||||
if (actualResetMs > DEFAULT_COOLDOWN_MS) {
|
if (actualResetMs > DEFAULT_COOLDOWN_MS) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
@@ -235,3 +238,132 @@ export function getRateLimitInfo(accounts, email, modelId) {
|
|||||||
waitMs
|
waitMs
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the consecutive failure count for an account
|
||||||
|
* Used for progressive backoff calculation (matches opencode-antigravity-auth)
|
||||||
|
*
|
||||||
|
* @param {Array} accounts - Array of account objects
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @returns {number} Number of consecutive failures
|
||||||
|
*/
|
||||||
|
export function getConsecutiveFailures(accounts, email) {
|
||||||
|
const account = accounts.find(a => a.email === email);
|
||||||
|
return account?.consecutiveFailures || 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset the consecutive failure count for an account
|
||||||
|
* Called on successful request (matches opencode-antigravity-auth)
|
||||||
|
*
|
||||||
|
* @param {Array} accounts - Array of account objects
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @returns {boolean} True if account was found and reset
|
||||||
|
*/
|
||||||
|
export function resetConsecutiveFailures(accounts, email) {
|
||||||
|
const account = accounts.find(a => a.email === email);
|
||||||
|
if (!account) return false;
|
||||||
|
account.consecutiveFailures = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the consecutive failure count for an account WITHOUT marking as rate limited
|
||||||
|
* Used for quick retries where we want to track failures but not skip the account
|
||||||
|
* (matches opencode-antigravity-auth behavior of always incrementing on 429)
|
||||||
|
*
|
||||||
|
* @param {Array} accounts - Array of account objects
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @returns {number} New consecutive failure count
|
||||||
|
*/
|
||||||
|
export function incrementConsecutiveFailures(accounts, email) {
|
||||||
|
const account = accounts.find(a => a.email === email);
|
||||||
|
if (!account) return 0;
|
||||||
|
account.consecutiveFailures = (account.consecutiveFailures || 0) + 1;
|
||||||
|
return account.consecutiveFailures;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Cooldown Mechanism (matches opencode-antigravity-auth)
|
||||||
|
// Separate from rate limits - used for temporary backoff after failures
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cooldown reasons for debugging/logging
|
||||||
|
*/
|
||||||
|
export const CooldownReason = {
|
||||||
|
RATE_LIMIT: 'rate_limit',
|
||||||
|
AUTH_FAILURE: 'auth_failure',
|
||||||
|
CONSECUTIVE_FAILURES: 'consecutive_failures',
|
||||||
|
SERVER_ERROR: 'server_error'
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark an account as cooling down for a specified duration
|
||||||
|
* Used for temporary backoff separate from rate limits
|
||||||
|
*
|
||||||
|
* @param {Array} accounts - Array of account objects
|
||||||
|
* @param {string} email - Email of the account
|
||||||
|
* @param {number} cooldownMs - Duration of cooldown in milliseconds
|
||||||
|
* @param {string} [reason] - Reason for the cooldown
|
||||||
|
* @returns {boolean} True if account was found and marked
|
||||||
|
*/
|
||||||
|
export function markAccountCoolingDown(accounts, email, cooldownMs, reason = CooldownReason.RATE_LIMIT) {
|
||||||
|
const account = accounts.find(a => a.email === email);
|
||||||
|
if (!account) return false;
|
||||||
|
|
||||||
|
account.coolingDownUntil = Date.now() + cooldownMs;
|
||||||
|
account.cooldownReason = reason;
|
||||||
|
|
||||||
|
logger.debug(`[AccountManager] Account ${email} cooling down for ${formatDuration(cooldownMs)} (reason: ${reason})`);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if an account is currently cooling down
|
||||||
|
* Automatically clears expired cooldowns
|
||||||
|
*
|
||||||
|
* @param {Object} account - Account object
|
||||||
|
* @returns {boolean} True if account is cooling down
|
||||||
|
*/
|
||||||
|
export function isAccountCoolingDown(account) {
|
||||||
|
if (!account || account.coolingDownUntil === undefined) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const now = Date.now();
|
||||||
|
if (now >= account.coolingDownUntil) {
|
||||||
|
// Cooldown expired - clear it
|
||||||
|
clearAccountCooldown(account);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear the cooldown for an account
|
||||||
|
*
|
||||||
|
* @param {Object} account - Account object
|
||||||
|
*/
|
||||||
|
export function clearAccountCooldown(account) {
|
||||||
|
if (account) {
|
||||||
|
delete account.coolingDownUntil;
|
||||||
|
delete account.cooldownReason;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get time remaining until cooldown expires for an account
|
||||||
|
*
|
||||||
|
* @param {Object} account - Account object
|
||||||
|
* @returns {number} Milliseconds until cooldown expires, 0 if not cooling down
|
||||||
|
*/
|
||||||
|
export function getCooldownRemaining(account) {
|
||||||
|
if (!account || account.coolingDownUntil === undefined) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const remaining = account.coolingDownUntil - Date.now();
|
||||||
|
return remaining > 0 ? remaining : 0;
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,6 +5,8 @@
|
|||||||
* All strategies must implement the selectAccount method.
|
* All strategies must implement the selectAccount method.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import { isAccountCoolingDown } from '../rate-limits.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef {Object} SelectionResult
|
* @typedef {Object} SelectionResult
|
||||||
* @property {Object|null} account - The selected account or null if none available
|
* @property {Object|null} account - The selected account or null if none available
|
||||||
@@ -77,6 +79,9 @@ export class BaseStrategy {
|
|||||||
// Skip disabled accounts
|
// Skip disabled accounts
|
||||||
if (account.enabled === false) return false;
|
if (account.enabled === false) return false;
|
||||||
|
|
||||||
|
// Check if account is cooling down (matches opencode-antigravity-auth)
|
||||||
|
if (isAccountCoolingDown(account)) return false;
|
||||||
|
|
||||||
// Check model-specific rate limit
|
// Check model-specific rate limit
|
||||||
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
|
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
|
||||||
const limit = account.modelRateLimits[modelId];
|
const limit = account.modelRateLimits[modelId];
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ export class HybridStrategy extends BaseStrategy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get candidates that pass all filters
|
// Get candidates that pass all filters
|
||||||
const candidates = this.#getCandidates(accounts, modelId);
|
const { candidates, fallbackLevel } = this.#getCandidates(accounts, modelId);
|
||||||
|
|
||||||
if (candidates.length === 0) {
|
if (candidates.length === 0) {
|
||||||
// Diagnose why no candidates are available and compute wait time
|
// Diagnose why no candidates are available and compute wait time
|
||||||
@@ -87,16 +87,30 @@ export class HybridStrategy extends BaseStrategy {
|
|||||||
const best = scored[0];
|
const best = scored[0];
|
||||||
best.account.lastUsed = Date.now();
|
best.account.lastUsed = Date.now();
|
||||||
|
|
||||||
// Consume a token from the bucket
|
// Consume a token from the bucket (unless in lastResort mode where we bypassed token check)
|
||||||
|
if (fallbackLevel !== 'lastResort') {
|
||||||
this.#tokenBucketTracker.consume(best.account.email);
|
this.#tokenBucketTracker.consume(best.account.email);
|
||||||
|
}
|
||||||
|
|
||||||
if (onSave) onSave();
|
if (onSave) onSave();
|
||||||
|
|
||||||
|
// Calculate throttle wait time based on fallback level
|
||||||
|
// This prevents overwhelming the API when all accounts are stressed
|
||||||
|
let waitMs = 0;
|
||||||
|
if (fallbackLevel === 'lastResort') {
|
||||||
|
// All accounts exhausted - add significant delay to allow rate limits to clear
|
||||||
|
waitMs = 500;
|
||||||
|
} else if (fallbackLevel === 'emergency') {
|
||||||
|
// All accounts unhealthy - add moderate delay
|
||||||
|
waitMs = 250;
|
||||||
|
}
|
||||||
|
|
||||||
const position = best.index + 1;
|
const position = best.index + 1;
|
||||||
const total = accounts.length;
|
const total = accounts.length;
|
||||||
logger.info(`[HybridStrategy] Using account: ${best.account.email} (${position}/${total}, score: ${best.score.toFixed(1)})`);
|
const fallbackInfo = fallbackLevel !== 'normal' ? `, fallback: ${fallbackLevel}` : '';
|
||||||
|
logger.info(`[HybridStrategy] Using account: ${best.account.email} (${position}/${total}, score: ${best.score.toFixed(1)}${fallbackInfo})`);
|
||||||
|
|
||||||
return { account: best.account, index: best.index, waitMs: 0 };
|
return { account: best.account, index: best.index, waitMs };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -131,6 +145,8 @@ export class HybridStrategy extends BaseStrategy {
|
|||||||
/**
|
/**
|
||||||
* Get candidates that pass all filters
|
* Get candidates that pass all filters
|
||||||
* @private
|
* @private
|
||||||
|
* @returns {{candidates: Array, fallbackLevel: string}} Candidates and fallback level used
|
||||||
|
* fallbackLevel: 'normal' | 'quota' | 'emergency' | 'lastResort'
|
||||||
*/
|
*/
|
||||||
#getCandidates(accounts, modelId) {
|
#getCandidates(accounts, modelId) {
|
||||||
const candidates = accounts
|
const candidates = accounts
|
||||||
@@ -160,9 +176,12 @@ export class HybridStrategy extends BaseStrategy {
|
|||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (candidates.length > 0) {
|
||||||
|
return { candidates, fallbackLevel: 'normal' };
|
||||||
|
}
|
||||||
|
|
||||||
// If no candidates after quota filter, fall back to all usable accounts
|
// If no candidates after quota filter, fall back to all usable accounts
|
||||||
// (better to use critical quota than fail entirely)
|
// (better to use critical quota than fail entirely)
|
||||||
if (candidates.length === 0) {
|
|
||||||
const fallback = accounts
|
const fallback = accounts
|
||||||
.map((account, index) => ({ account, index }))
|
.map((account, index) => ({ account, index }))
|
||||||
.filter(({ account }) => {
|
.filter(({ account }) => {
|
||||||
@@ -173,11 +192,40 @@ export class HybridStrategy extends BaseStrategy {
|
|||||||
});
|
});
|
||||||
if (fallback.length > 0) {
|
if (fallback.length > 0) {
|
||||||
logger.warn('[HybridStrategy] All accounts have critical quota, using fallback');
|
logger.warn('[HybridStrategy] All accounts have critical quota, using fallback');
|
||||||
return fallback;
|
return { candidates: fallback, fallbackLevel: 'quota' };
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return candidates;
|
// Emergency fallback: bypass health check when ALL accounts are unhealthy
|
||||||
|
// This prevents "Max retries exceeded" when health scores are too low
|
||||||
|
const emergency = accounts
|
||||||
|
.map((account, index) => ({ account, index }))
|
||||||
|
.filter(({ account }) => {
|
||||||
|
if (!this.isAccountUsable(account, modelId)) return false;
|
||||||
|
if (!this.#tokenBucketTracker.hasTokens(account.email)) return false;
|
||||||
|
// Skip health check - use "least bad" account
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
if (emergency.length > 0) {
|
||||||
|
logger.warn('[HybridStrategy] EMERGENCY: All accounts unhealthy, using least bad account');
|
||||||
|
return { candidates: emergency, fallbackLevel: 'emergency' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last resort: bypass BOTH health AND token bucket checks
|
||||||
|
// Only check basic usability (not rate-limited, not disabled)
|
||||||
|
const lastResort = accounts
|
||||||
|
.map((account, index) => ({ account, index }))
|
||||||
|
.filter(({ account }) => {
|
||||||
|
// Only check if account is usable (not rate-limited, not disabled)
|
||||||
|
if (!this.isAccountUsable(account, modelId)) return false;
|
||||||
|
// Skip health and token bucket checks entirely
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
if (lastResort.length > 0) {
|
||||||
|
logger.warn('[HybridStrategy] LAST RESORT: All accounts exhausted, using any usable account');
|
||||||
|
return { candidates: lastResort, fallbackLevel: 'lastResort' };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { candidates: [], fallbackLevel: 'normal' };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -202,11 +250,11 @@ export class HybridStrategy extends BaseStrategy {
|
|||||||
const quotaComponent = quotaScore * this.#weights.quota;
|
const quotaComponent = quotaScore * this.#weights.quota;
|
||||||
|
|
||||||
// LRU component (older = higher score)
|
// LRU component (older = higher score)
|
||||||
// Use time since last use, capped at 1 hour for scoring
|
// Use time since last use in seconds, capped at 1 hour (matches opencode-antigravity-auth)
|
||||||
const lastUsed = account.lastUsed || 0;
|
const lastUsed = account.lastUsed || 0;
|
||||||
const timeSinceLastUse = Math.min(Date.now() - lastUsed, 3600000); // Cap at 1 hour
|
const timeSinceLastUse = Math.min(Date.now() - lastUsed, 3600000); // Cap at 1 hour
|
||||||
const lruMinutes = timeSinceLastUse / 60000;
|
const lruSeconds = timeSinceLastUse / 1000;
|
||||||
const lruComponent = lruMinutes * this.#weights.lru;
|
const lruComponent = lruSeconds * this.#weights.lru; // 0-3600 * 0.1 = 0-360 max
|
||||||
|
|
||||||
return healthComponent + tokenComponent + quotaComponent + lruComponent;
|
return healthComponent + tokenComponent + quotaComponent + lruComponent;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ const DEFAULT_CONFIG = {
|
|||||||
successReward: 1, // Points on successful request
|
successReward: 1, // Points on successful request
|
||||||
rateLimitPenalty: -10, // Points on rate limit
|
rateLimitPenalty: -10, // Points on rate limit
|
||||||
failurePenalty: -20, // Points on other failures
|
failurePenalty: -20, // Points on other failures
|
||||||
recoveryPerHour: 2, // Passive recovery rate
|
recoveryPerHour: 10, // Passive recovery rate (increased from 2 for faster recovery)
|
||||||
minUsable: 50, // Minimum score to be selected
|
minUsable: 50, // Minimum score to be selected
|
||||||
maxScore: 100 // Maximum score cap
|
maxScore: 100 // Maximum score cap
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -11,63 +11,92 @@ import {
|
|||||||
MAX_WAIT_BEFORE_ERROR_MS,
|
MAX_WAIT_BEFORE_ERROR_MS,
|
||||||
DEFAULT_COOLDOWN_MS,
|
DEFAULT_COOLDOWN_MS,
|
||||||
RATE_LIMIT_DEDUP_WINDOW_MS,
|
RATE_LIMIT_DEDUP_WINDOW_MS,
|
||||||
|
RATE_LIMIT_STATE_RESET_MS,
|
||||||
|
FIRST_RETRY_DELAY_MS,
|
||||||
|
SWITCH_ACCOUNT_DELAY_MS,
|
||||||
MAX_CONSECUTIVE_FAILURES,
|
MAX_CONSECUTIVE_FAILURES,
|
||||||
EXTENDED_COOLDOWN_MS,
|
EXTENDED_COOLDOWN_MS,
|
||||||
CAPACITY_RETRY_DELAY_MS,
|
CAPACITY_BACKOFF_TIERS_MS,
|
||||||
MAX_CAPACITY_RETRIES,
|
MAX_CAPACITY_RETRIES,
|
||||||
|
BACKOFF_BY_ERROR_TYPE,
|
||||||
|
QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
|
||||||
|
MIN_BACKOFF_MS,
|
||||||
isThinkingModel
|
isThinkingModel
|
||||||
} from '../constants.js';
|
} from '../constants.js';
|
||||||
import { convertGoogleToAnthropic } from '../format/index.js';
|
import { convertGoogleToAnthropic } from '../format/index.js';
|
||||||
import { isRateLimitError, isAuthError } from '../errors.js';
|
import { isRateLimitError, isAuthError } from '../errors.js';
|
||||||
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
||||||
import { logger } from '../utils/logger.js';
|
import { logger } from '../utils/logger.js';
|
||||||
import { parseResetTime } from './rate-limit-parser.js';
|
import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
|
||||||
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
||||||
import { parseThinkingSSEResponse } from './sse-parser.js';
|
import { parseThinkingSSEResponse } from './sse-parser.js';
|
||||||
import { getFallbackModel } from '../fallback-config.js';
|
import { getFallbackModel } from '../fallback-config.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
|
* Rate limit deduplication - prevents thundering herd on concurrent rate limits.
|
||||||
* Tracks last rate limit timestamp per model to skip duplicate retries
|
* Tracks rate limit state per account+model including consecutive429 count and timestamps.
|
||||||
*/
|
*/
|
||||||
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
|
const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if we should skip retry due to recent rate limit on this model
|
* Get deduplication key for rate limit tracking
|
||||||
|
* @param {string} email - Account email
|
||||||
* @param {string} model - Model ID
|
* @param {string} model - Model ID
|
||||||
* @returns {boolean} True if retry should be skipped (within dedup window)
|
* @returns {string} Dedup key
|
||||||
*/
|
*/
|
||||||
function shouldSkipRetryDueToDedup(model) {
|
function getDedupKey(email, model) {
|
||||||
const lastTimestamp = lastRateLimitTimestamps.get(model);
|
return `${email}:${model}`;
|
||||||
if (!lastTimestamp) return false;
|
}
|
||||||
|
|
||||||
const elapsed = Date.now() - lastTimestamp;
|
/**
|
||||||
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
|
* Get rate limit backoff with deduplication and exponential backoff (matches opencode-antigravity-auth)
|
||||||
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
|
* @param {string} email - Account email
|
||||||
return true;
|
* @param {string} model - Model ID
|
||||||
|
* @param {number|null} serverRetryAfterMs - Server-provided retry time
|
||||||
|
* @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
|
||||||
|
*/
|
||||||
|
function getRateLimitBackoff(email, model, serverRetryAfterMs) {
|
||||||
|
const now = Date.now();
|
||||||
|
const stateKey = getDedupKey(email, model);
|
||||||
|
const previous = rateLimitStateByAccountModel.get(stateKey);
|
||||||
|
|
||||||
|
// Check if within dedup window - return duplicate status
|
||||||
|
if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
|
||||||
|
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
|
||||||
|
const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
|
||||||
|
logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
|
||||||
|
return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
// Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
|
||||||
|
const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
|
||||||
|
? previous.consecutive429 + 1
|
||||||
|
: 1;
|
||||||
|
|
||||||
|
// Update state
|
||||||
|
rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
|
||||||
|
|
||||||
|
// Calculate exponential backoff
|
||||||
|
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
|
||||||
|
const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
|
||||||
|
|
||||||
|
logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
|
||||||
|
return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record rate limit timestamp for deduplication
|
* Clear rate limit state after successful request
|
||||||
|
* @param {string} email - Account email
|
||||||
* @param {string} model - Model ID
|
* @param {string} model - Model ID
|
||||||
*/
|
*/
|
||||||
function recordRateLimitTimestamp(model) {
|
function clearRateLimitState(email, model) {
|
||||||
lastRateLimitTimestamps.set(model, Date.now());
|
const key = getDedupKey(email, model);
|
||||||
|
rateLimitStateByAccountModel.delete(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clear rate limit timestamp after successful retry
|
* Detect permanent authentication failures that require re-authentication.
|
||||||
* @param {string} model - Model ID
|
* These should mark the account as invalid rather than just clearing cache.
|
||||||
*/
|
|
||||||
function clearRateLimitTimestamp(model) {
|
|
||||||
lastRateLimitTimestamps.delete(model);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gap 3: Detect permanent authentication failures that require re-authentication
|
|
||||||
* These should mark the account as invalid rather than just clearing cache
|
|
||||||
* @param {string} errorText - Error message from API
|
* @param {string} errorText - Error message from API
|
||||||
* @returns {boolean} True if permanent auth failure
|
* @returns {boolean} True if permanent auth failure
|
||||||
*/
|
*/
|
||||||
@@ -82,8 +111,8 @@ function isPermanentAuthFailure(errorText) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
|
* Detect if 429 error is due to model capacity (not user quota).
|
||||||
* Capacity issues should retry on same account with shorter delay
|
* Capacity issues should retry on same account with shorter delay.
|
||||||
* @param {string} errorText - Error message from API
|
* @param {string} errorText - Error message from API
|
||||||
* @returns {boolean} True if capacity exhausted (not quota)
|
* @returns {boolean} True if capacity exhausted (not quota)
|
||||||
*/
|
*/
|
||||||
@@ -95,16 +124,47 @@ function isModelCapacityExhausted(errorText) {
|
|||||||
lower.includes('service temporarily unavailable');
|
lower.includes('service temporarily unavailable');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Periodically clean up stale dedup timestamps (every 60 seconds)
|
// Periodically clean up stale rate limit state (every 60 seconds)
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
const cutoff = Date.now() - 60000; // 1 minute
|
const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
|
||||||
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
|
for (const [key, state] of rateLimitStateByAccountModel.entries()) {
|
||||||
if (timestamp < cutoff) {
|
if (state.lastAt < cutoff) {
|
||||||
lastRateLimitTimestamps.delete(model);
|
rateLimitStateByAccountModel.delete(key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, 60000);
|
}, 60000);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate smart backoff based on error type (matches opencode-antigravity-auth)
|
||||||
|
* @param {string} errorText - Error message
|
||||||
|
* @param {number|null} serverResetMs - Reset time from server
|
||||||
|
* @param {number} consecutiveFailures - Number of consecutive failures
|
||||||
|
* @returns {number} Backoff time in milliseconds
|
||||||
|
*/
|
||||||
|
function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
|
||||||
|
// If server provides a reset time, use it (with minimum floor to prevent loops)
|
||||||
|
if (serverResetMs && serverResetMs > 0) {
|
||||||
|
return Math.max(serverResetMs, MIN_BACKOFF_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
const reason = parseRateLimitReason(errorText);
|
||||||
|
|
||||||
|
switch (reason) {
|
||||||
|
case 'QUOTA_EXHAUSTED':
|
||||||
|
// Progressive backoff: [60s, 5m, 30m, 2h]
|
||||||
|
const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
|
||||||
|
return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
|
||||||
|
case 'RATE_LIMIT_EXCEEDED':
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
|
||||||
|
case 'MODEL_CAPACITY_EXHAUSTED':
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
|
||||||
|
case 'SERVER_ERROR':
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
|
||||||
|
default:
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send a non-streaming request to Cloud Code with multi-account support
|
* Send a non-streaming request to Cloud Code with multi-account support
|
||||||
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
|
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
|
||||||
@@ -174,7 +234,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
// Select account using configured strategy
|
// Select account using configured strategy
|
||||||
const { account, waitMs } = accountManager.selectAccount(model);
|
const { account, waitMs } = accountManager.selectAccount(model);
|
||||||
|
|
||||||
// If strategy returns a wait time, sleep and retry
|
// If strategy returns a wait time without an account, sleep and retry
|
||||||
if (!account && waitMs > 0) {
|
if (!account && waitMs > 0) {
|
||||||
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
||||||
await sleep(waitMs + 500);
|
await sleep(waitMs + 500);
|
||||||
@@ -182,6 +242,13 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If strategy returns an account with throttle wait (fallback mode), apply delay
|
||||||
|
// This prevents overwhelming the API when using emergency/lastResort fallbacks
|
||||||
|
if (account && waitMs > 0) {
|
||||||
|
logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
}
|
||||||
|
|
||||||
if (!account) {
|
if (!account) {
|
||||||
logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
|
logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
|
||||||
continue;
|
continue;
|
||||||
@@ -197,8 +264,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
|
|
||||||
// Try each endpoint with index-based loop for capacity retry support
|
// Try each endpoint with index-based loop for capacity retry support
|
||||||
let lastError = null;
|
let lastError = null;
|
||||||
let retriedOnce = false; // Track if we've already retried for short rate limit
|
let capacityRetryCount = 0;
|
||||||
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
|
|
||||||
let endpointIndex = 0;
|
let endpointIndex = 0;
|
||||||
|
|
||||||
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
||||||
@@ -219,7 +285,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
|
logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
|
||||||
|
|
||||||
if (response.status === 401) {
|
if (response.status === 401) {
|
||||||
// Gap 3: Check for permanent auth failures
|
// Check for permanent auth failures
|
||||||
if (isPermanentAuthFailure(errorText)) {
|
if (isPermanentAuthFailure(errorText)) {
|
||||||
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
||||||
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
||||||
@@ -236,12 +302,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
|
|
||||||
if (response.status === 429) {
|
if (response.status === 429) {
|
||||||
const resetMs = parseResetTime(response, errorText);
|
const resetMs = parseResetTime(response, errorText);
|
||||||
|
const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
|
||||||
|
|
||||||
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
|
// Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
|
||||||
if (isModelCapacityExhausted(errorText)) {
|
if (isModelCapacityExhausted(errorText)) {
|
||||||
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
||||||
|
// Progressive capacity backoff tiers
|
||||||
|
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
|
||||||
|
const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
|
||||||
capacityRetryCount++;
|
capacityRetryCount++;
|
||||||
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
|
// Track failures for progressive backoff escalation (matches opencode-antigravity-auth)
|
||||||
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
||||||
await sleep(waitMs);
|
await sleep(waitMs);
|
||||||
// Don't increment endpointIndex - retry same endpoint
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
@@ -251,39 +322,80 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gap 1: Check deduplication window to prevent thundering herd
|
// Get rate limit backoff with exponential backoff and state reset
|
||||||
if (shouldSkipRetryDueToDedup(model)) {
|
const backoff = getRateLimitBackoff(account.email, model, resetMs);
|
||||||
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
|
|
||||||
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
|
|
||||||
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decision: wait and retry OR switch account
|
// For very short rate limits (< 1 second), always wait and retry
|
||||||
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
// Switching accounts won't help when all accounts have per-second rate limits
|
||||||
// Long-term quota exhaustion (> 10s) - switch to next account
|
if (resetMs !== null && resetMs < 1000) {
|
||||||
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
|
const waitMs = resetMs;
|
||||||
accountManager.markRateLimited(account.email, resetMs, model);
|
logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
|
||||||
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
|
||||||
} else {
|
|
||||||
// Short-term rate limit (<= 10s) - wait and retry once
|
|
||||||
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
|
|
||||||
|
|
||||||
if (!retriedOnce) {
|
|
||||||
retriedOnce = true;
|
|
||||||
recordRateLimitTimestamp(model); // Gap 1: Record before retry
|
|
||||||
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
|
||||||
await sleep(waitMs);
|
await sleep(waitMs);
|
||||||
// Don't increment endpointIndex - retry same endpoint
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
continue;
|
continue;
|
||||||
} else {
|
|
||||||
// Already retried once, mark and switch
|
|
||||||
accountManager.markRateLimited(account.email, waitMs, model);
|
|
||||||
throw new Error(`RATE_LIMITED: ${errorText}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If within dedup window AND reset time is >= 1s, switch account
|
||||||
|
if (backoff.isDuplicate) {
|
||||||
|
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
|
||||||
|
logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
|
||||||
|
accountManager.markRateLimited(account.email, smartBackoffMs, model);
|
||||||
|
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate smart backoff based on error type
|
||||||
|
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
|
||||||
|
|
||||||
|
// Decision: wait and retry OR switch account
|
||||||
|
// First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
|
||||||
|
if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
|
||||||
|
// Quick 1s retry on first 429 (matches opencode-antigravity-auth)
|
||||||
|
const waitMs = backoff.delayMs;
|
||||||
|
// markRateLimited already increments consecutiveFailures internally
|
||||||
|
// This prevents concurrent retry storms and ensures progressive backoff escalation
|
||||||
|
accountManager.markRateLimited(account.email, waitMs, model);
|
||||||
|
logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
|
continue;
|
||||||
|
} else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
|
||||||
|
// Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
|
||||||
|
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
|
||||||
|
await sleep(SWITCH_ACCOUNT_DELAY_MS);
|
||||||
|
accountManager.markRateLimited(account.email, smartBackoffMs, model);
|
||||||
|
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
||||||
|
} else {
|
||||||
|
// Short-term rate limit but not first attempt - use exponential backoff delay
|
||||||
|
const waitMs = backoff.delayMs;
|
||||||
|
// markRateLimited already increments consecutiveFailures internally
|
||||||
|
accountManager.markRateLimited(account.email, waitMs, model);
|
||||||
|
logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status >= 400) {
|
if (response.status >= 400) {
|
||||||
|
// Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
|
||||||
|
if (response.status === 503 && isModelCapacityExhausted(errorText)) {
|
||||||
|
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
||||||
|
// Progressive capacity backoff tiers (same as 429 capacity handling)
|
||||||
|
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
|
||||||
|
const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
|
||||||
|
capacityRetryCount++;
|
||||||
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
|
logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Max capacity retries exceeded - switch account
|
||||||
|
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
|
||||||
|
accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
|
||||||
|
throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
|
||||||
|
}
|
||||||
|
|
||||||
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
||||||
// Try next endpoint for 403/404/5xx errors (matches opencode-antigravity-auth behavior)
|
// Try next endpoint for 403/404/5xx errors (matches opencode-antigravity-auth behavior)
|
||||||
if (response.status === 403 || response.status === 404) {
|
if (response.status === 403 || response.status === 404) {
|
||||||
@@ -300,8 +412,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
// For thinking models, parse SSE and accumulate all parts
|
// For thinking models, parse SSE and accumulate all parts
|
||||||
if (isThinking) {
|
if (isThinking) {
|
||||||
const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
|
const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
|
||||||
// Gap 1: Clear timestamp on success
|
// Clear rate limit state on success
|
||||||
clearRateLimitTimestamp(model);
|
clearRateLimitState(account.email, model);
|
||||||
accountManager.notifySuccess(account, model);
|
accountManager.notifySuccess(account, model);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -309,8 +421,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
// Non-thinking models use regular JSON
|
// Non-thinking models use regular JSON
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
logger.debug('[CloudCode] Response received');
|
logger.debug('[CloudCode] Response received');
|
||||||
// Gap 1: Clear timestamp on success
|
// Clear rate limit state on success
|
||||||
clearRateLimitTimestamp(model);
|
clearRateLimitState(account.email, model);
|
||||||
accountManager.notifySuccess(account, model);
|
accountManager.notifySuccess(account, model);
|
||||||
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
||||||
|
|
||||||
@@ -350,13 +462,15 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
||||||
accountManager.notifyFailure(account, model);
|
accountManager.notifyFailure(account, model);
|
||||||
|
|
||||||
// Gap 2: Check consecutive failures for extended cooldown
|
// Track 5xx errors for extended cooldown
|
||||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
// Note: markRateLimited already increments consecutiveFailures internally
|
||||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
const currentFailures = accountManager.getConsecutiveFailures(account.email);
|
||||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
|
||||||
|
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||||
} else {
|
} else {
|
||||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
|
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -364,13 +478,15 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|||||||
if (isNetworkError(error)) {
|
if (isNetworkError(error)) {
|
||||||
accountManager.notifyFailure(account, model);
|
accountManager.notifyFailure(account, model);
|
||||||
|
|
||||||
// Gap 2: Check consecutive failures for extended cooldown
|
// Track network errors for extended cooldown
|
||||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
// Note: markRateLimited already increments consecutiveFailures internally
|
||||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
const currentFailures = accountManager.getConsecutiveFailures(account.email);
|
||||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
|
||||||
|
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||||
} else {
|
} else {
|
||||||
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
|
logger.warn(`[CloudCode] Network error for ${account.email} (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
|
||||||
}
|
}
|
||||||
await sleep(1000);
|
await sleep(1000);
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@@ -167,15 +167,69 @@ export function parseResetTime(responseOrError, errorText = '') {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SANITY CHECK: Enforce strict minimums for found rate limits
|
// SANITY CHECK: Handle very small or negative reset times
|
||||||
// If we found a reset time, but it's very small (e.g. < 1s) or negative,
|
// For sub-second rate limits (common with per-second quotas), add a small buffer
|
||||||
// explicitly bump it up to avoid "Available in 0s" loops.
|
// For negative or zero, use a reasonable minimum
|
||||||
if (resetMs !== null) {
|
if (resetMs !== null) {
|
||||||
if (resetMs < 1000) {
|
if (resetMs <= 0) {
|
||||||
logger.debug(`[CloudCode] Reset time too small (${resetMs}ms), enforcing 2s buffer`);
|
logger.debug(`[CloudCode] Reset time invalid (${resetMs}ms), using 500ms default`);
|
||||||
resetMs = 2000;
|
resetMs = 500;
|
||||||
|
} else if (resetMs < 500) {
|
||||||
|
// Very short reset - add 200ms buffer for network latency
|
||||||
|
logger.debug(`[CloudCode] Short reset time (${resetMs}ms), adding 200ms buffer`);
|
||||||
|
resetMs = resetMs + 200;
|
||||||
}
|
}
|
||||||
|
// Note: No longer enforcing 2s minimum - this was causing cascading failures
|
||||||
|
// when all accounts had short rate limits simultaneously
|
||||||
}
|
}
|
||||||
|
|
||||||
return resetMs;
|
return resetMs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse the rate limit reason from error text
|
||||||
|
* Used for smart backoff by error type (matches opencode-antigravity-auth)
|
||||||
|
*
|
||||||
|
* @param {string} errorText - Error message/body text
|
||||||
|
* @returns {'RATE_LIMIT_EXCEEDED' | 'QUOTA_EXHAUSTED' | 'MODEL_CAPACITY_EXHAUSTED' | 'SERVER_ERROR' | 'UNKNOWN'} Error reason
|
||||||
|
*/
|
||||||
|
export function parseRateLimitReason(errorText) {
|
||||||
|
const lower = (errorText || '').toLowerCase();
|
||||||
|
|
||||||
|
// Check for quota exhaustion (daily/hourly limits)
|
||||||
|
if (lower.includes('quota_exhausted') ||
|
||||||
|
lower.includes('quotaresetdelay') ||
|
||||||
|
lower.includes('quotaresettimestamp') ||
|
||||||
|
lower.includes('resource_exhausted') ||
|
||||||
|
lower.includes('daily limit') ||
|
||||||
|
lower.includes('quota exceeded')) {
|
||||||
|
return 'QUOTA_EXHAUSTED';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for model capacity issues (temporary, retry quickly)
|
||||||
|
if (lower.includes('model_capacity_exhausted') ||
|
||||||
|
lower.includes('capacity_exhausted') ||
|
||||||
|
lower.includes('model is currently overloaded') ||
|
||||||
|
lower.includes('service temporarily unavailable')) {
|
||||||
|
return 'MODEL_CAPACITY_EXHAUSTED';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for rate limiting (per-minute limits)
|
||||||
|
if (lower.includes('rate_limit_exceeded') ||
|
||||||
|
lower.includes('rate limit') ||
|
||||||
|
lower.includes('too many requests') ||
|
||||||
|
lower.includes('throttl')) {
|
||||||
|
return 'RATE_LIMIT_EXCEEDED';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for server errors
|
||||||
|
if (lower.includes('internal server error') ||
|
||||||
|
lower.includes('server error') ||
|
||||||
|
lower.includes('503') ||
|
||||||
|
lower.includes('502') ||
|
||||||
|
lower.includes('504')) {
|
||||||
|
return 'SERVER_ERROR';
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'UNKNOWN';
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,61 +12,90 @@ import {
|
|||||||
MAX_WAIT_BEFORE_ERROR_MS,
|
MAX_WAIT_BEFORE_ERROR_MS,
|
||||||
DEFAULT_COOLDOWN_MS,
|
DEFAULT_COOLDOWN_MS,
|
||||||
RATE_LIMIT_DEDUP_WINDOW_MS,
|
RATE_LIMIT_DEDUP_WINDOW_MS,
|
||||||
|
RATE_LIMIT_STATE_RESET_MS,
|
||||||
|
FIRST_RETRY_DELAY_MS,
|
||||||
|
SWITCH_ACCOUNT_DELAY_MS,
|
||||||
MAX_CONSECUTIVE_FAILURES,
|
MAX_CONSECUTIVE_FAILURES,
|
||||||
EXTENDED_COOLDOWN_MS,
|
EXTENDED_COOLDOWN_MS,
|
||||||
CAPACITY_RETRY_DELAY_MS,
|
CAPACITY_BACKOFF_TIERS_MS,
|
||||||
MAX_CAPACITY_RETRIES
|
MAX_CAPACITY_RETRIES,
|
||||||
|
BACKOFF_BY_ERROR_TYPE,
|
||||||
|
QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
|
||||||
|
MIN_BACKOFF_MS
|
||||||
} from '../constants.js';
|
} from '../constants.js';
|
||||||
import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
|
import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
|
||||||
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
||||||
import { logger } from '../utils/logger.js';
|
import { logger } from '../utils/logger.js';
|
||||||
import { parseResetTime } from './rate-limit-parser.js';
|
import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
|
||||||
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
||||||
import { streamSSEResponse } from './sse-streamer.js';
|
import { streamSSEResponse } from './sse-streamer.js';
|
||||||
import { getFallbackModel } from '../fallback-config.js';
|
import { getFallbackModel } from '../fallback-config.js';
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
|
* Rate limit deduplication - prevents thundering herd on concurrent rate limits.
|
||||||
* Tracks last rate limit timestamp per model to skip duplicate retries
|
* Tracks rate limit state per account+model including consecutive429 count and timestamps.
|
||||||
*/
|
*/
|
||||||
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
|
const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if we should skip retry due to recent rate limit on this model
|
* Get deduplication key for rate limit tracking
|
||||||
|
* @param {string} email - Account email
|
||||||
* @param {string} model - Model ID
|
* @param {string} model - Model ID
|
||||||
* @returns {boolean} True if retry should be skipped (within dedup window)
|
* @returns {string} Dedup key
|
||||||
*/
|
*/
|
||||||
function shouldSkipRetryDueToDedup(model) {
|
function getDedupKey(email, model) {
|
||||||
const lastTimestamp = lastRateLimitTimestamps.get(model);
|
return `${email}:${model}`;
|
||||||
if (!lastTimestamp) return false;
|
}
|
||||||
|
|
||||||
const elapsed = Date.now() - lastTimestamp;
|
/**
|
||||||
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
|
* Get rate limit backoff with deduplication and exponential backoff (matches opencode-antigravity-auth)
|
||||||
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
|
* @param {string} email - Account email
|
||||||
return true;
|
* @param {string} model - Model ID
|
||||||
|
* @param {number|null} serverRetryAfterMs - Server-provided retry time
|
||||||
|
* @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
|
||||||
|
*/
|
||||||
|
function getRateLimitBackoff(email, model, serverRetryAfterMs) {
|
||||||
|
const now = Date.now();
|
||||||
|
const stateKey = getDedupKey(email, model);
|
||||||
|
const previous = rateLimitStateByAccountModel.get(stateKey);
|
||||||
|
|
||||||
|
// Check if within dedup window - return duplicate status
|
||||||
|
if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
|
||||||
|
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
|
||||||
|
const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
|
||||||
|
logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
|
||||||
|
return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
// Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
|
||||||
|
const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
|
||||||
|
? previous.consecutive429 + 1
|
||||||
|
: 1;
|
||||||
|
|
||||||
|
// Update state
|
||||||
|
rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
|
||||||
|
|
||||||
|
// Calculate exponential backoff
|
||||||
|
const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
|
||||||
|
const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
|
||||||
|
|
||||||
|
logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
|
||||||
|
return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record rate limit timestamp for deduplication
|
* Clear rate limit state after successful request
|
||||||
|
* @param {string} email - Account email
|
||||||
* @param {string} model - Model ID
|
* @param {string} model - Model ID
|
||||||
*/
|
*/
|
||||||
function recordRateLimitTimestamp(model) {
|
function clearRateLimitState(email, model) {
|
||||||
lastRateLimitTimestamps.set(model, Date.now());
|
const key = getDedupKey(email, model);
|
||||||
|
rateLimitStateByAccountModel.delete(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clear rate limit timestamp after successful retry
|
* Detect permanent authentication failures that require re-authentication.
|
||||||
* @param {string} model - Model ID
|
|
||||||
*/
|
|
||||||
function clearRateLimitTimestamp(model) {
|
|
||||||
lastRateLimitTimestamps.delete(model);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gap 3: Detect permanent authentication failures that require re-authentication
|
|
||||||
* @param {string} errorText - Error message from API
|
* @param {string} errorText - Error message from API
|
||||||
* @returns {boolean} True if permanent auth failure
|
* @returns {boolean} True if permanent auth failure
|
||||||
*/
|
*/
|
||||||
@@ -81,7 +110,7 @@ function isPermanentAuthFailure(errorText) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
|
* Detect if 429 error is due to model capacity (not user quota).
|
||||||
* @param {string} errorText - Error message from API
|
* @param {string} errorText - Error message from API
|
||||||
* @returns {boolean} True if capacity exhausted (not quota)
|
* @returns {boolean} True if capacity exhausted (not quota)
|
||||||
*/
|
*/
|
||||||
@@ -93,16 +122,47 @@ function isModelCapacityExhausted(errorText) {
|
|||||||
lower.includes('service temporarily unavailable');
|
lower.includes('service temporarily unavailable');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Periodically clean up stale dedup timestamps (every 60 seconds)
|
// Periodically clean up stale rate limit state (every 60 seconds)
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
const cutoff = Date.now() - 60000; // 1 minute
|
const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
|
||||||
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
|
for (const [key, state] of rateLimitStateByAccountModel.entries()) {
|
||||||
if (timestamp < cutoff) {
|
if (state.lastAt < cutoff) {
|
||||||
lastRateLimitTimestamps.delete(model);
|
rateLimitStateByAccountModel.delete(key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, 60000);
|
}, 60000);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate smart backoff based on error type (matches opencode-antigravity-auth)
|
||||||
|
* @param {string} errorText - Error message
|
||||||
|
* @param {number|null} serverResetMs - Reset time from server
|
||||||
|
* @param {number} consecutiveFailures - Number of consecutive failures
|
||||||
|
* @returns {number} Backoff time in milliseconds
|
||||||
|
*/
|
||||||
|
function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
|
||||||
|
// If server provides a reset time, use it (with minimum floor to prevent loops)
|
||||||
|
if (serverResetMs && serverResetMs > 0) {
|
||||||
|
return Math.max(serverResetMs, MIN_BACKOFF_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
const reason = parseRateLimitReason(errorText);
|
||||||
|
|
||||||
|
switch (reason) {
|
||||||
|
case 'QUOTA_EXHAUSTED':
|
||||||
|
// Progressive backoff: [60s, 5m, 30m, 2h]
|
||||||
|
const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
|
||||||
|
return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
|
||||||
|
case 'RATE_LIMIT_EXCEEDED':
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
|
||||||
|
case 'MODEL_CAPACITY_EXHAUSTED':
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
|
||||||
|
case 'SERVER_ERROR':
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
|
||||||
|
default:
|
||||||
|
return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send a streaming request to Cloud Code with multi-account support
|
* Send a streaming request to Cloud Code with multi-account support
|
||||||
* Streams events in real-time as they arrive from the server
|
* Streams events in real-time as they arrive from the server
|
||||||
@@ -172,7 +232,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
// Select account using configured strategy
|
// Select account using configured strategy
|
||||||
const { account, waitMs } = accountManager.selectAccount(model);
|
const { account, waitMs } = accountManager.selectAccount(model);
|
||||||
|
|
||||||
// If strategy returns a wait time, sleep and retry
|
// If strategy returns a wait time without an account, sleep and retry
|
||||||
if (!account && waitMs > 0) {
|
if (!account && waitMs > 0) {
|
||||||
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
||||||
await sleep(waitMs + 500);
|
await sleep(waitMs + 500);
|
||||||
@@ -180,6 +240,13 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If strategy returns an account with throttle wait (fallback mode), apply delay
|
||||||
|
// This prevents overwhelming the API when using emergency/lastResort fallbacks
|
||||||
|
if (account && waitMs > 0) {
|
||||||
|
logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
}
|
||||||
|
|
||||||
if (!account) {
|
if (!account) {
|
||||||
logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
|
logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
|
||||||
continue;
|
continue;
|
||||||
@@ -195,8 +262,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
|
|
||||||
// Try each endpoint with index-based loop for capacity retry support
|
// Try each endpoint with index-based loop for capacity retry support
|
||||||
let lastError = null;
|
let lastError = null;
|
||||||
let retriedOnce = false; // Track if we've already retried for short rate limit
|
let capacityRetryCount = 0;
|
||||||
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
|
|
||||||
let endpointIndex = 0;
|
let endpointIndex = 0;
|
||||||
|
|
||||||
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
||||||
@@ -215,7 +281,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
|
logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
|
||||||
|
|
||||||
if (response.status === 401) {
|
if (response.status === 401) {
|
||||||
// Gap 3: Check for permanent auth failures
|
// Check for permanent auth failures
|
||||||
if (isPermanentAuthFailure(errorText)) {
|
if (isPermanentAuthFailure(errorText)) {
|
||||||
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
||||||
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
||||||
@@ -231,12 +297,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
|
|
||||||
if (response.status === 429) {
|
if (response.status === 429) {
|
||||||
const resetMs = parseResetTime(response, errorText);
|
const resetMs = parseResetTime(response, errorText);
|
||||||
|
const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
|
||||||
|
|
||||||
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
|
// Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
|
||||||
if (isModelCapacityExhausted(errorText)) {
|
if (isModelCapacityExhausted(errorText)) {
|
||||||
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
||||||
|
// Progressive capacity backoff tiers
|
||||||
|
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
|
||||||
|
const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
|
||||||
capacityRetryCount++;
|
capacityRetryCount++;
|
||||||
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
|
// Track failures for progressive backoff escalation (matches opencode-antigravity-auth)
|
||||||
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
||||||
await sleep(waitMs);
|
await sleep(waitMs);
|
||||||
// Don't increment endpointIndex - retry same endpoint
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
@@ -246,36 +317,76 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gap 1: Check deduplication window to prevent thundering herd
|
// Get rate limit backoff with exponential backoff and state reset
|
||||||
if (shouldSkipRetryDueToDedup(model)) {
|
const backoff = getRateLimitBackoff(account.email, model, resetMs);
|
||||||
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
|
|
||||||
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
|
|
||||||
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decision: wait and retry OR switch account
|
// For very short rate limits (< 1 second), always wait and retry
|
||||||
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
// Switching accounts won't help when all accounts have per-second rate limits
|
||||||
// Long-term quota exhaustion (> 10s) - switch to next account
|
if (resetMs !== null && resetMs < 1000) {
|
||||||
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
|
const waitMs = resetMs;
|
||||||
accountManager.markRateLimited(account.email, resetMs, model);
|
logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
|
||||||
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
|
||||||
} else {
|
|
||||||
// Short-term rate limit (<= 10s) - wait and retry once
|
|
||||||
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
|
|
||||||
|
|
||||||
if (!retriedOnce) {
|
|
||||||
retriedOnce = true;
|
|
||||||
recordRateLimitTimestamp(model); // Gap 1: Record before retry
|
|
||||||
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
|
||||||
await sleep(waitMs);
|
await sleep(waitMs);
|
||||||
// Don't increment endpointIndex - retry same endpoint
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
continue;
|
continue;
|
||||||
} else {
|
}
|
||||||
// Already retried once, mark and switch
|
|
||||||
|
// If within dedup window AND reset time is >= 1s, switch account
|
||||||
|
if (backoff.isDuplicate) {
|
||||||
|
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
|
||||||
|
logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
|
||||||
|
accountManager.markRateLimited(account.email, smartBackoffMs, model);
|
||||||
|
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate smart backoff based on error type
|
||||||
|
const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
|
||||||
|
|
||||||
|
// Decision: wait and retry OR switch account
|
||||||
|
// First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
|
||||||
|
if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
|
||||||
|
// Quick 1s retry on first 429 (matches opencode-antigravity-auth)
|
||||||
|
const waitMs = backoff.delayMs;
|
||||||
|
// markRateLimited already increments consecutiveFailures internally
|
||||||
accountManager.markRateLimited(account.email, waitMs, model);
|
accountManager.markRateLimited(account.email, waitMs, model);
|
||||||
throw new Error(`RATE_LIMITED: ${errorText}`);
|
logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
|
continue;
|
||||||
|
} else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
|
||||||
|
// Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
|
||||||
|
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
|
||||||
|
await sleep(SWITCH_ACCOUNT_DELAY_MS);
|
||||||
|
accountManager.markRateLimited(account.email, smartBackoffMs, model);
|
||||||
|
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
||||||
|
} else {
|
||||||
|
// Short-term rate limit but not first attempt - use exponential backoff delay
|
||||||
|
const waitMs = backoff.delayMs;
|
||||||
|
// markRateLimited already increments consecutiveFailures internally
|
||||||
|
accountManager.markRateLimited(account.email, waitMs, model);
|
||||||
|
logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
|
||||||
|
if (response.status === 503 && isModelCapacityExhausted(errorText)) {
|
||||||
|
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
||||||
|
// Progressive capacity backoff tiers (same as 429 capacity handling)
|
||||||
|
const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
|
||||||
|
const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
|
||||||
|
capacityRetryCount++;
|
||||||
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
|
logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
||||||
|
await sleep(waitMs);
|
||||||
|
// Don't increment endpointIndex - retry same endpoint
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Max capacity retries exceeded - switch account
|
||||||
|
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
|
||||||
|
accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
|
||||||
|
throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
||||||
@@ -299,8 +410,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
try {
|
try {
|
||||||
yield* streamSSEResponse(currentResponse, anthropicRequest.model);
|
yield* streamSSEResponse(currentResponse, anthropicRequest.model);
|
||||||
logger.debug('[CloudCode] Stream completed');
|
logger.debug('[CloudCode] Stream completed');
|
||||||
// Gap 1: Clear timestamp on success
|
// Clear rate limit state on success
|
||||||
clearRateLimitTimestamp(model);
|
clearRateLimitState(account.email, model);
|
||||||
accountManager.notifySuccess(account, model);
|
accountManager.notifySuccess(account, model);
|
||||||
return;
|
return;
|
||||||
} catch (streamError) {
|
} catch (streamError) {
|
||||||
@@ -409,13 +520,15 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
||||||
accountManager.notifyFailure(account, model);
|
accountManager.notifyFailure(account, model);
|
||||||
|
|
||||||
// Gap 2: Check consecutive failures for extended cooldown
|
// Track 5xx errors for extended cooldown
|
||||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
// Note: markRateLimited already increments consecutiveFailures internally
|
||||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
const currentFailures = accountManager.getConsecutiveFailures(account.email);
|
||||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
|
||||||
|
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||||
} else {
|
} else {
|
||||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
|
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -423,13 +536,15 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|||||||
if (isNetworkError(error)) {
|
if (isNetworkError(error)) {
|
||||||
accountManager.notifyFailure(account, model);
|
accountManager.notifyFailure(account, model);
|
||||||
|
|
||||||
// Gap 2: Check consecutive failures for extended cooldown
|
// Track network errors for extended cooldown
|
||||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
// Note: markRateLimited already increments consecutiveFailures internally
|
||||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
const currentFailures = accountManager.getConsecutiveFailures(account.email);
|
||||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
|
||||||
|
logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||||
} else {
|
} else {
|
||||||
logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
|
accountManager.incrementConsecutiveFailures(account.email);
|
||||||
|
logger.warn(`[CloudCode] Network error for ${account.email} (stream) (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
|
||||||
}
|
}
|
||||||
await sleep(1000);
|
await sleep(1000);
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@@ -16,6 +16,11 @@ const DEFAULT_CONFIG = {
|
|||||||
defaultCooldownMs: 10000, // 10 seconds
|
defaultCooldownMs: 10000, // 10 seconds
|
||||||
maxWaitBeforeErrorMs: 120000, // 2 minutes
|
maxWaitBeforeErrorMs: 120000, // 2 minutes
|
||||||
maxAccounts: 10, // Maximum number of accounts allowed
|
maxAccounts: 10, // Maximum number of accounts allowed
|
||||||
|
// Rate limit handling (matches opencode-antigravity-auth)
|
||||||
|
rateLimitDedupWindowMs: 2000, // 2 seconds - prevents concurrent retry storms
|
||||||
|
maxConsecutiveFailures: 3, // Before applying extended cooldown
|
||||||
|
extendedCooldownMs: 60000, // 1 minute extended cooldown
|
||||||
|
maxCapacityRetries: 5, // Max retries for capacity exhaustion
|
||||||
modelMapping: {},
|
modelMapping: {},
|
||||||
// Account selection strategy configuration
|
// Account selection strategy configuration
|
||||||
accountSelection: {
|
accountSelection: {
|
||||||
|
|||||||
@@ -103,16 +103,33 @@ export const MAX_ACCOUNTS = config?.maxAccounts || 10; // From config or 10
|
|||||||
// Rate limit wait thresholds
|
// Rate limit wait thresholds
|
||||||
export const MAX_WAIT_BEFORE_ERROR_MS = config?.maxWaitBeforeErrorMs || 120000; // From config or 2 minutes
|
export const MAX_WAIT_BEFORE_ERROR_MS = config?.maxWaitBeforeErrorMs || 120000; // From config or 2 minutes
|
||||||
|
|
||||||
// Gap 1: Retry deduplication - prevents thundering herd on concurrent rate limits
|
// Retry deduplication - prevents thundering herd on concurrent rate limits
|
||||||
export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 5000; // 5 seconds
|
export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 2000; // 2 seconds
|
||||||
|
export const RATE_LIMIT_STATE_RESET_MS = config?.rateLimitStateResetMs || 120000; // 2 minutes - reset consecutive429 after inactivity
|
||||||
|
export const FIRST_RETRY_DELAY_MS = config?.firstRetryDelayMs || 1000; // Quick 1s retry on first 429
|
||||||
|
export const SWITCH_ACCOUNT_DELAY_MS = config?.switchAccountDelayMs || 5000; // Delay before switching accounts
|
||||||
|
|
||||||
// Gap 2: Consecutive failure tracking - extended cooldown after repeated failures
|
// Consecutive failure tracking - extended cooldown after repeated failures
|
||||||
export const MAX_CONSECUTIVE_FAILURES = config?.maxConsecutiveFailures || 3;
|
export const MAX_CONSECUTIVE_FAILURES = config?.maxConsecutiveFailures || 3;
|
||||||
export const EXTENDED_COOLDOWN_MS = config?.extendedCooldownMs || 60000; // 1 minute
|
export const EXTENDED_COOLDOWN_MS = config?.extendedCooldownMs || 60000; // 1 minute
|
||||||
|
|
||||||
// Gap 4: Capacity exhaustion - shorter retry for model capacity issues (not quota)
|
// Capacity exhaustion - progressive backoff tiers for model capacity issues
|
||||||
export const CAPACITY_RETRY_DELAY_MS = config?.capacityRetryDelayMs || 2000; // 2 seconds
|
export const CAPACITY_BACKOFF_TIERS_MS = config?.capacityBackoffTiersMs || [5000, 10000, 20000, 30000, 60000];
|
||||||
export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 3;
|
export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 5;
|
||||||
|
|
||||||
|
// Smart backoff by error type
|
||||||
|
export const BACKOFF_BY_ERROR_TYPE = {
|
||||||
|
RATE_LIMIT_EXCEEDED: 30000, // 30 seconds
|
||||||
|
MODEL_CAPACITY_EXHAUSTED: 15000, // 15 seconds
|
||||||
|
SERVER_ERROR: 20000, // 20 seconds
|
||||||
|
UNKNOWN: 60000 // 1 minute
|
||||||
|
};
|
||||||
|
|
||||||
|
// Progressive backoff tiers for QUOTA_EXHAUSTED (60s, 5m, 30m, 2h)
|
||||||
|
export const QUOTA_EXHAUSTED_BACKOFF_TIERS_MS = [60000, 300000, 1800000, 7200000];
|
||||||
|
|
||||||
|
// Minimum backoff floor to prevent "Available in 0s" loops (matches opencode-antigravity-auth)
|
||||||
|
export const MIN_BACKOFF_MS = 2000;
|
||||||
|
|
||||||
// Thinking model constants
|
// Thinking model constants
|
||||||
export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length
|
export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length
|
||||||
@@ -258,10 +275,16 @@ export default {
|
|||||||
MAX_ACCOUNTS,
|
MAX_ACCOUNTS,
|
||||||
MAX_WAIT_BEFORE_ERROR_MS,
|
MAX_WAIT_BEFORE_ERROR_MS,
|
||||||
RATE_LIMIT_DEDUP_WINDOW_MS,
|
RATE_LIMIT_DEDUP_WINDOW_MS,
|
||||||
|
RATE_LIMIT_STATE_RESET_MS,
|
||||||
|
FIRST_RETRY_DELAY_MS,
|
||||||
|
SWITCH_ACCOUNT_DELAY_MS,
|
||||||
MAX_CONSECUTIVE_FAILURES,
|
MAX_CONSECUTIVE_FAILURES,
|
||||||
EXTENDED_COOLDOWN_MS,
|
EXTENDED_COOLDOWN_MS,
|
||||||
CAPACITY_RETRY_DELAY_MS,
|
CAPACITY_BACKOFF_TIERS_MS,
|
||||||
MAX_CAPACITY_RETRIES,
|
MAX_CAPACITY_RETRIES,
|
||||||
|
BACKOFF_BY_ERROR_TYPE,
|
||||||
|
QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
|
||||||
|
MIN_BACKOFF_MS,
|
||||||
MIN_SIGNATURE_LENGTH,
|
MIN_SIGNATURE_LENGTH,
|
||||||
GEMINI_MAX_OUTPUT_TOKENS,
|
GEMINI_MAX_OUTPUT_TOKENS,
|
||||||
GEMINI_SKIP_SIGNATURE,
|
GEMINI_SKIP_SIGNATURE,
|
||||||
|
|||||||
@@ -397,7 +397,7 @@ export function mountWebUI(app, dirname, accountManager) {
|
|||||||
*/
|
*/
|
||||||
app.post('/api/config', (req, res) => {
|
app.post('/api/config', (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs, maxAccounts, accountSelection, rateLimitDedupWindowMs, maxConsecutiveFailures, extendedCooldownMs, capacityRetryDelayMs, maxCapacityRetries } = req.body;
|
const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs, maxAccounts, accountSelection, rateLimitDedupWindowMs, maxConsecutiveFailures, extendedCooldownMs, maxCapacityRetries } = req.body;
|
||||||
|
|
||||||
// Only allow updating specific fields (security)
|
// Only allow updating specific fields (security)
|
||||||
const updates = {};
|
const updates = {};
|
||||||
@@ -435,9 +435,6 @@ export function mountWebUI(app, dirname, accountManager) {
|
|||||||
if (typeof extendedCooldownMs === 'number' && extendedCooldownMs >= 10000 && extendedCooldownMs <= 300000) {
|
if (typeof extendedCooldownMs === 'number' && extendedCooldownMs >= 10000 && extendedCooldownMs <= 300000) {
|
||||||
updates.extendedCooldownMs = extendedCooldownMs;
|
updates.extendedCooldownMs = extendedCooldownMs;
|
||||||
}
|
}
|
||||||
if (typeof capacityRetryDelayMs === 'number' && capacityRetryDelayMs >= 500 && capacityRetryDelayMs <= 10000) {
|
|
||||||
updates.capacityRetryDelayMs = capacityRetryDelayMs;
|
|
||||||
}
|
|
||||||
if (typeof maxCapacityRetries === 'number' && maxCapacityRetries >= 1 && maxCapacityRetries <= 10) {
|
if (typeof maxCapacityRetries === 'number' && maxCapacityRetries >= 1 && maxCapacityRetries <= 10) {
|
||||||
updates.maxCapacityRetries = maxCapacityRetries;
|
updates.maxCapacityRetries = maxCapacityRetries;
|
||||||
}
|
}
|
||||||
|
|||||||
93
tests/stress-test.cjs
Normal file
93
tests/stress-test.cjs
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
/**
|
||||||
|
* Stress Test - Send multiple concurrent requests to test rate limit handling
|
||||||
|
*
|
||||||
|
* Usage: node tests/stress-test.cjs [count] [model]
|
||||||
|
* Example: node tests/stress-test.cjs 10 gemini-3-flash
|
||||||
|
*/
|
||||||
|
|
||||||
|
const BASE_URL = process.env.ANTHROPIC_BASE_URL || 'http://localhost:8080';
|
||||||
|
|
||||||
|
const count = parseInt(process.argv[2]) || 8;
|
||||||
|
const model = process.argv[3] || 'gemini-3-flash';
|
||||||
|
|
||||||
|
async function sendRequest(id) {
|
||||||
|
const startTime = Date.now();
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${BASE_URL}/v1/messages`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': 'test',
|
||||||
|
'anthropic-version': '2023-06-01'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: model,
|
||||||
|
max_tokens: 100,
|
||||||
|
messages: [
|
||||||
|
{ role: 'user', content: `Request ${id}: Say "Hello ${id}" and nothing else.` }
|
||||||
|
]
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
const elapsed = Date.now() - startTime;
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
console.log(`[${id}] ❌ ${response.status} after ${elapsed}ms: ${errorText.substring(0, 100)}`);
|
||||||
|
return { id, success: false, status: response.status, elapsed };
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
const text = data.content?.[0]?.text?.substring(0, 50) || 'No text';
|
||||||
|
console.log(`[${id}] ✅ 200 after ${elapsed}ms: "${text}..."`);
|
||||||
|
return { id, success: true, status: 200, elapsed };
|
||||||
|
} catch (error) {
|
||||||
|
const elapsed = Date.now() - startTime;
|
||||||
|
console.log(`[${id}] ❌ Error after ${elapsed}ms: ${error.message}`);
|
||||||
|
return { id, success: false, error: error.message, elapsed };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runStressTest() {
|
||||||
|
console.log(`\n🚀 Stress Test: Sending ${count} concurrent requests to ${model}\n`);
|
||||||
|
console.log(`Target: ${BASE_URL}/v1/messages\n`);
|
||||||
|
console.log('─'.repeat(70));
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// Send all requests concurrently
|
||||||
|
const promises = [];
|
||||||
|
for (let i = 1; i <= count; i++) {
|
||||||
|
promises.push(sendRequest(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await Promise.all(promises);
|
||||||
|
|
||||||
|
const totalElapsed = Date.now() - startTime;
|
||||||
|
console.log('─'.repeat(70));
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
const successful = results.filter(r => r.success).length;
|
||||||
|
const failed = results.filter(r => !r.success).length;
|
||||||
|
const avgElapsed = Math.round(results.reduce((sum, r) => sum + r.elapsed, 0) / results.length);
|
||||||
|
|
||||||
|
console.log(`\n📊 Summary:`);
|
||||||
|
console.log(` Total time: ${totalElapsed}ms`);
|
||||||
|
console.log(` Successful: ${successful}/${count}`);
|
||||||
|
console.log(` Failed: ${failed}/${count}`);
|
||||||
|
console.log(` Avg response time: ${avgElapsed}ms`);
|
||||||
|
|
||||||
|
if (failed > 0) {
|
||||||
|
const errors = results.filter(r => !r.success);
|
||||||
|
const statusCounts = {};
|
||||||
|
errors.forEach(e => {
|
||||||
|
const key = e.status || 'network';
|
||||||
|
statusCounts[key] = (statusCounts[key] || 0) + 1;
|
||||||
|
});
|
||||||
|
console.log(` Error breakdown: ${JSON.stringify(statusCounts)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
|
runStressTest().catch(console.error);
|
||||||
Reference in New Issue
Block a user