feat: add configurable account selection strategies
Refactor account selection into a strategy pattern with three options: - Sticky: cache-optimized, stays on same account until rate-limited - Round-robin: load-balanced, rotates every request - Hybrid (default): smart distribution using health scores, token buckets, and LRU The hybrid strategy uses multiple signals for optimal account selection: health tracking for reliability, client-side token buckets for rate limiting, and LRU freshness to prefer rested accounts. Includes WebUI settings for strategy selection and unit tests. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* Account Manager
|
||||
* Manages multiple Antigravity accounts with sticky selection,
|
||||
* Manages multiple Antigravity accounts with configurable selection strategies,
|
||||
* automatic failover, and smart cooldown for rate-limited accounts.
|
||||
*/
|
||||
|
||||
@@ -23,13 +23,9 @@ import {
|
||||
clearProjectCache as clearProject,
|
||||
clearTokenCache as clearToken
|
||||
} from './credentials.js';
|
||||
import {
|
||||
pickNext as selectNext,
|
||||
getCurrentStickyAccount as getSticky,
|
||||
shouldWaitForCurrentAccount as shouldWait,
|
||||
pickStickyAccount as selectSticky
|
||||
} from './selection.js';
|
||||
import { createStrategy, getStrategyLabel, DEFAULT_STRATEGY } from './strategies/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { config } from '../config.js';
|
||||
|
||||
export class AccountManager {
|
||||
#accounts = [];
|
||||
@@ -37,19 +33,26 @@ export class AccountManager {
|
||||
#configPath;
|
||||
#settings = {};
|
||||
#initialized = false;
|
||||
#strategy = null;
|
||||
#strategyName = DEFAULT_STRATEGY;
|
||||
|
||||
// Per-account caches
|
||||
#tokenCache = new Map(); // email -> { token, extractedAt }
|
||||
#projectCache = new Map(); // email -> projectId
|
||||
|
||||
constructor(configPath = ACCOUNT_CONFIG_PATH) {
|
||||
constructor(configPath = ACCOUNT_CONFIG_PATH, strategyName = null) {
|
||||
this.#configPath = configPath;
|
||||
// Strategy name can be set at construction or later via initialize
|
||||
if (strategyName) {
|
||||
this.#strategyName = strategyName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the account manager by loading config
|
||||
* @param {string} [strategyOverride] - Override strategy name (from CLI flag or env var)
|
||||
*/
|
||||
async initialize() {
|
||||
async initialize(strategyOverride = null) {
|
||||
if (this.#initialized) return;
|
||||
|
||||
const { accounts, settings, activeIndex } = await loadAccounts(this.#configPath);
|
||||
@@ -66,6 +69,16 @@ export class AccountManager {
|
||||
this.#tokenCache = tokenCache;
|
||||
}
|
||||
|
||||
// Determine strategy: CLI override > env var > config file > default
|
||||
const configStrategy = config?.accountSelection?.strategy;
|
||||
const envStrategy = process.env.ACCOUNT_STRATEGY;
|
||||
this.#strategyName = strategyOverride || envStrategy || configStrategy || this.#strategyName;
|
||||
|
||||
// Create the strategy instance
|
||||
const strategyConfig = config?.accountSelection || {};
|
||||
this.#strategy = createStrategy(this.#strategyName, strategyConfig);
|
||||
logger.info(`[AccountManager] Using ${getStrategyLabel(this.#strategyName)} selection strategy`);
|
||||
|
||||
// Clear any expired rate limits
|
||||
this.clearExpiredLimits();
|
||||
|
||||
@@ -138,51 +151,88 @@ export class AccountManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the next available account (fallback when current is unavailable).
|
||||
* Sets activeIndex to the selected account's index.
|
||||
* @param {string} [modelId] - Optional model ID
|
||||
* @returns {Object|null} The next available account or null if none available
|
||||
*/
|
||||
pickNext(modelId = null) {
|
||||
const { account, newIndex } = selectNext(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId);
|
||||
this.#currentIndex = newIndex;
|
||||
return account;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current account without advancing the index (sticky selection).
|
||||
* Used for cache continuity - sticks to the same account until rate-limited.
|
||||
* @param {string} [modelId] - Optional model ID
|
||||
* @returns {Object|null} The current account or null if unavailable/rate-limited
|
||||
*/
|
||||
getCurrentStickyAccount(modelId = null) {
|
||||
const { account, newIndex } = getSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId);
|
||||
this.#currentIndex = newIndex;
|
||||
return account;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should wait for the current account's rate limit to reset.
|
||||
* Used for sticky account selection - wait if rate limit is short (≤ threshold).
|
||||
* @param {string} [modelId] - Optional model ID
|
||||
* @returns {{shouldWait: boolean, waitMs: number, account: Object|null}}
|
||||
*/
|
||||
shouldWaitForCurrentAccount(modelId = null) {
|
||||
return shouldWait(this.#accounts, this.#currentIndex, modelId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick an account with sticky selection preference.
|
||||
* Prefers the current account for cache continuity, only switches when:
|
||||
* - Current account is rate-limited for > 2 minutes
|
||||
* - Current account is invalid
|
||||
* @param {string} [modelId] - Optional model ID
|
||||
* Select an account using the configured strategy.
|
||||
* This is the main method to use for account selection.
|
||||
* @param {string} [modelId] - Model ID for the request
|
||||
* @param {Object} [options] - Additional options
|
||||
* @param {string} [options.sessionId] - Session ID for cache continuity
|
||||
* @returns {{account: Object|null, waitMs: number}} Account to use and optional wait time
|
||||
*/
|
||||
pickStickyAccount(modelId = null) {
|
||||
const { account, waitMs, newIndex } = selectSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId);
|
||||
this.#currentIndex = newIndex;
|
||||
return { account, waitMs };
|
||||
selectAccount(modelId = null, options = {}) {
|
||||
if (!this.#strategy) {
|
||||
throw new Error('AccountManager not initialized. Call initialize() first.');
|
||||
}
|
||||
|
||||
const result = this.#strategy.selectAccount(this.#accounts, modelId, {
|
||||
currentIndex: this.#currentIndex,
|
||||
onSave: () => this.saveToDisk(),
|
||||
...options
|
||||
});
|
||||
|
||||
this.#currentIndex = result.index;
|
||||
return { account: result.account, waitMs: result.waitMs || 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the strategy of a successful request
|
||||
* @param {Object} account - The account that was used
|
||||
* @param {string} modelId - The model ID that was used
|
||||
*/
|
||||
notifySuccess(account, modelId) {
|
||||
if (this.#strategy) {
|
||||
this.#strategy.onSuccess(account, modelId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the strategy of a rate limit
|
||||
* @param {Object} account - The account that was rate-limited
|
||||
* @param {string} modelId - The model ID that was rate-limited
|
||||
*/
|
||||
notifyRateLimit(account, modelId) {
|
||||
if (this.#strategy) {
|
||||
this.#strategy.onRateLimit(account, modelId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the strategy of a failure
|
||||
* @param {Object} account - The account that failed
|
||||
* @param {string} modelId - The model ID that failed
|
||||
*/
|
||||
notifyFailure(account, modelId) {
|
||||
if (this.#strategy) {
|
||||
this.#strategy.onFailure(account, modelId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current strategy name
|
||||
* @returns {string} Strategy name
|
||||
*/
|
||||
getStrategyName() {
|
||||
return this.#strategyName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the strategy display label
|
||||
* @returns {string} Strategy display label
|
||||
*/
|
||||
getStrategyLabel() {
|
||||
return getStrategyLabel(this.#strategyName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the health tracker from the current strategy (if available)
|
||||
* Used by handlers for consecutive failure tracking
|
||||
* Only available when using hybrid strategy
|
||||
* @returns {Object|null} Health tracker instance or null if not available
|
||||
*/
|
||||
getHealthTracker() {
|
||||
if (this.#strategy && typeof this.#strategy.getHealthTracker === 'function') {
|
||||
return this.#strategy.getHealthTracker();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,201 +0,0 @@
|
||||
/**
|
||||
* Account Selection
|
||||
*
|
||||
* Handles account picking logic (round-robin, sticky) for cache continuity.
|
||||
* All rate limit checks are model-specific.
|
||||
*/
|
||||
|
||||
import { MAX_WAIT_BEFORE_ERROR_MS } from '../constants.js';
|
||||
import { formatDuration } from '../utils/helpers.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { clearExpiredLimits, getAvailableAccounts } from './rate-limits.js';
|
||||
|
||||
/**
|
||||
* Check if an account is usable for a specific model
|
||||
* @param {Object} account - Account object
|
||||
* @param {string} modelId - Model ID to check
|
||||
* @returns {boolean} True if account is usable
|
||||
*/
|
||||
function isAccountUsable(account, modelId) {
|
||||
if (!account || account.isInvalid) return false;
|
||||
|
||||
// WebUI: Skip disabled accounts
|
||||
if (account.enabled === false) return false;
|
||||
|
||||
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
|
||||
const limit = account.modelRateLimits[modelId];
|
||||
if (limit.isRateLimited && limit.resetTime > Date.now()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the next available account (fallback when current is unavailable).
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {number} currentIndex - Current account index
|
||||
* @param {Function} onSave - Callback to save changes
|
||||
* @param {string} [modelId] - Model ID to check rate limits for
|
||||
* @returns {{account: Object|null, newIndex: number}} The next available account and new index
|
||||
*/
|
||||
export function pickNext(accounts, currentIndex, onSave, modelId = null) {
|
||||
clearExpiredLimits(accounts);
|
||||
|
||||
const available = getAvailableAccounts(accounts, modelId);
|
||||
if (available.length === 0) {
|
||||
return { account: null, newIndex: currentIndex };
|
||||
}
|
||||
|
||||
// Clamp index to valid range
|
||||
let index = currentIndex;
|
||||
if (index >= accounts.length) {
|
||||
index = 0;
|
||||
}
|
||||
|
||||
// Find next available account starting from index AFTER current
|
||||
for (let i = 1; i <= accounts.length; i++) {
|
||||
const idx = (index + i) % accounts.length;
|
||||
const account = accounts[idx];
|
||||
|
||||
if (isAccountUsable(account, modelId)) {
|
||||
account.lastUsed = Date.now();
|
||||
|
||||
const position = idx + 1;
|
||||
const total = accounts.length;
|
||||
logger.info(`[AccountManager] Using account: ${account.email} (${position}/${total})`);
|
||||
|
||||
// Trigger save (don't await to avoid blocking)
|
||||
if (onSave) onSave();
|
||||
|
||||
return { account, newIndex: idx };
|
||||
}
|
||||
}
|
||||
|
||||
return { account: null, newIndex: currentIndex };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current account without advancing the index (sticky selection).
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {number} currentIndex - Current account index
|
||||
* @param {Function} onSave - Callback to save changes
|
||||
* @param {string} [modelId] - Model ID to check rate limits for
|
||||
* @returns {{account: Object|null, newIndex: number}} The current account and index
|
||||
*/
|
||||
export function getCurrentStickyAccount(accounts, currentIndex, onSave, modelId = null) {
|
||||
clearExpiredLimits(accounts);
|
||||
|
||||
if (accounts.length === 0) {
|
||||
return { account: null, newIndex: currentIndex };
|
||||
}
|
||||
|
||||
// Clamp index to valid range
|
||||
let index = currentIndex;
|
||||
if (index >= accounts.length) {
|
||||
index = 0;
|
||||
}
|
||||
|
||||
// Get current account directly (activeIndex = current account)
|
||||
const account = accounts[index];
|
||||
|
||||
if (isAccountUsable(account, modelId)) {
|
||||
account.lastUsed = Date.now();
|
||||
// Trigger save (don't await to avoid blocking)
|
||||
if (onSave) onSave();
|
||||
return { account, newIndex: index };
|
||||
}
|
||||
|
||||
return { account: null, newIndex: index };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should wait for the current account's rate limit to reset.
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {number} currentIndex - Current account index
|
||||
* @param {string} [modelId] - Model ID to check rate limits for
|
||||
* @returns {{shouldWait: boolean, waitMs: number, account: Object|null}}
|
||||
*/
|
||||
export function shouldWaitForCurrentAccount(accounts, currentIndex, modelId = null) {
|
||||
if (accounts.length === 0) {
|
||||
return { shouldWait: false, waitMs: 0, account: null };
|
||||
}
|
||||
|
||||
// Clamp index to valid range
|
||||
let index = currentIndex;
|
||||
if (index >= accounts.length) {
|
||||
index = 0;
|
||||
}
|
||||
|
||||
// Get current account directly (activeIndex = current account)
|
||||
const account = accounts[index];
|
||||
|
||||
if (!account || account.isInvalid) {
|
||||
return { shouldWait: false, waitMs: 0, account: null };
|
||||
}
|
||||
|
||||
let waitMs = 0;
|
||||
|
||||
// Check model-specific limit
|
||||
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
|
||||
const limit = account.modelRateLimits[modelId];
|
||||
if (limit.isRateLimited && limit.resetTime) {
|
||||
waitMs = limit.resetTime - Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
// If wait time is within threshold, recommend waiting
|
||||
if (waitMs > 0 && waitMs <= MAX_WAIT_BEFORE_ERROR_MS) {
|
||||
return { shouldWait: true, waitMs, account };
|
||||
}
|
||||
|
||||
return { shouldWait: false, waitMs: 0, account };
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick an account with sticky selection preference.
|
||||
* Prefers the current account for cache continuity.
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {number} currentIndex - Current account index
|
||||
* @param {Function} onSave - Callback to save changes
|
||||
* @param {string} [modelId] - Model ID to check rate limits for
|
||||
* @returns {{account: Object|null, waitMs: number, newIndex: number}}
|
||||
*/
|
||||
export function pickStickyAccount(accounts, currentIndex, onSave, modelId = null) {
|
||||
// First try to get the current sticky account
|
||||
const { account: stickyAccount, newIndex: stickyIndex } = getCurrentStickyAccount(accounts, currentIndex, onSave, modelId);
|
||||
if (stickyAccount) {
|
||||
return { account: stickyAccount, waitMs: 0, newIndex: stickyIndex };
|
||||
}
|
||||
|
||||
// Current account is rate-limited or invalid.
|
||||
// CHECK IF OTHERS ARE AVAILABLE before deciding to wait.
|
||||
const available = getAvailableAccounts(accounts, modelId);
|
||||
if (available.length > 0) {
|
||||
// Found a free account! Switch immediately.
|
||||
const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave, modelId);
|
||||
if (nextAccount) {
|
||||
logger.info(`[AccountManager] Switched to new account (failover): ${nextAccount.email}`);
|
||||
return { account: nextAccount, waitMs: 0, newIndex };
|
||||
}
|
||||
}
|
||||
|
||||
// No other accounts available. Now checking if we should wait for current account.
|
||||
const waitInfo = shouldWaitForCurrentAccount(accounts, currentIndex, modelId);
|
||||
if (waitInfo.shouldWait) {
|
||||
logger.info(`[AccountManager] Waiting ${formatDuration(waitInfo.waitMs)} for sticky account: ${waitInfo.account.email}`);
|
||||
return { account: null, waitMs: waitInfo.waitMs, newIndex: currentIndex };
|
||||
}
|
||||
|
||||
// Current account unavailable for too long/invalid, and no others available?
|
||||
const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave, modelId);
|
||||
if (nextAccount) {
|
||||
logger.info(`[AccountManager] Switched to new account for cache: ${nextAccount.email}`);
|
||||
}
|
||||
return { account: nextAccount, waitMs: 0, newIndex };
|
||||
}
|
||||
104
src/account-manager/strategies/base-strategy.js
Normal file
104
src/account-manager/strategies/base-strategy.js
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Base Strategy
|
||||
*
|
||||
* Abstract base class defining the interface for account selection strategies.
|
||||
* All strategies must implement the selectAccount method.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} SelectionResult
|
||||
* @property {Object|null} account - The selected account or null if none available
|
||||
* @property {number} index - The index of the selected account
|
||||
* @property {number} [waitMs] - Optional wait time before account becomes available
|
||||
*/
|
||||
|
||||
export class BaseStrategy {
|
||||
/**
|
||||
* Create a new BaseStrategy
|
||||
* @param {Object} config - Strategy configuration
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
if (new.target === BaseStrategy) {
|
||||
throw new Error('BaseStrategy is abstract and cannot be instantiated directly');
|
||||
}
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select an account for a request
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {string} modelId - The model ID for the request
|
||||
* @param {Object} options - Additional options
|
||||
* @param {number} options.currentIndex - Current account index
|
||||
* @param {string} [options.sessionId] - Session ID for cache continuity
|
||||
* @param {Function} [options.onSave] - Callback to save changes
|
||||
* @returns {SelectionResult} The selected account and index
|
||||
*/
|
||||
selectAccount(accounts, modelId, options = {}) {
|
||||
throw new Error('selectAccount must be implemented by subclass');
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after a successful request
|
||||
* @param {Object} account - The account that was used
|
||||
* @param {string} modelId - The model ID that was used
|
||||
*/
|
||||
onSuccess(account, modelId) {
|
||||
// Default: no-op, override in subclass if needed
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a request is rate-limited
|
||||
* @param {Object} account - The account that was rate-limited
|
||||
* @param {string} modelId - The model ID that was rate-limited
|
||||
*/
|
||||
onRateLimit(account, modelId) {
|
||||
// Default: no-op, override in subclass if needed
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a request fails (non-rate-limit error)
|
||||
* @param {Object} account - The account that failed
|
||||
* @param {string} modelId - The model ID that failed
|
||||
*/
|
||||
onFailure(account, modelId) {
|
||||
// Default: no-op, override in subclass if needed
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an account is usable for a specific model
|
||||
* @param {Object} account - Account object
|
||||
* @param {string} modelId - Model ID to check
|
||||
* @returns {boolean} True if account is usable
|
||||
*/
|
||||
isAccountUsable(account, modelId) {
|
||||
if (!account || account.isInvalid) return false;
|
||||
|
||||
// Skip disabled accounts
|
||||
if (account.enabled === false) return false;
|
||||
|
||||
// Check model-specific rate limit
|
||||
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
|
||||
const limit = account.modelRateLimits[modelId];
|
||||
if (limit.isRateLimited && limit.resetTime > Date.now()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all usable accounts for a model
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {string} modelId - Model ID to check
|
||||
* @returns {Array} Array of usable accounts with their original indices
|
||||
*/
|
||||
getUsableAccounts(accounts, modelId) {
|
||||
return accounts
|
||||
.map((account, index) => ({ account, index }))
|
||||
.filter(({ account }) => this.isAccountUsable(account, modelId));
|
||||
}
|
||||
}
|
||||
|
||||
export default BaseStrategy;
|
||||
195
src/account-manager/strategies/hybrid-strategy.js
Normal file
195
src/account-manager/strategies/hybrid-strategy.js
Normal file
@@ -0,0 +1,195 @@
|
||||
/**
|
||||
* Hybrid Strategy
|
||||
*
|
||||
* Smart selection based on health score, token bucket, and LRU freshness.
|
||||
* Combines multiple signals for optimal account distribution.
|
||||
*
|
||||
* Scoring formula:
|
||||
* score = (Health × 2) + ((Tokens / MaxTokens × 100) × 5) + (LRU × 0.1)
|
||||
*
|
||||
* Filters accounts that are:
|
||||
* - Not rate-limited
|
||||
* - Not invalid or disabled
|
||||
* - Health score >= minUsable
|
||||
* - Has tokens available
|
||||
*/
|
||||
|
||||
import { BaseStrategy } from './base-strategy.js';
|
||||
import { HealthTracker, TokenBucketTracker } from './trackers/index.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
|
||||
// Default weights for scoring
|
||||
const DEFAULT_WEIGHTS = {
|
||||
health: 2,
|
||||
tokens: 5,
|
||||
lru: 0.1
|
||||
};
|
||||
|
||||
export class HybridStrategy extends BaseStrategy {
|
||||
#healthTracker;
|
||||
#tokenBucketTracker;
|
||||
#weights;
|
||||
|
||||
/**
|
||||
* Create a new HybridStrategy
|
||||
* @param {Object} config - Strategy configuration
|
||||
* @param {Object} [config.healthScore] - Health tracker configuration
|
||||
* @param {Object} [config.tokenBucket] - Token bucket configuration
|
||||
* @param {Object} [config.weights] - Scoring weights
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
super(config);
|
||||
this.#healthTracker = new HealthTracker(config.healthScore || {});
|
||||
this.#tokenBucketTracker = new TokenBucketTracker(config.tokenBucket || {});
|
||||
this.#weights = { ...DEFAULT_WEIGHTS, ...config.weights };
|
||||
}
|
||||
|
||||
/**
|
||||
* Select an account based on combined health, tokens, and LRU score
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {string} modelId - The model ID for the request
|
||||
* @param {Object} options - Additional options
|
||||
* @returns {SelectionResult} The selected account and index
|
||||
*/
|
||||
selectAccount(accounts, modelId, options = {}) {
|
||||
const { onSave } = options;
|
||||
|
||||
if (accounts.length === 0) {
|
||||
return { account: null, index: 0, waitMs: 0 };
|
||||
}
|
||||
|
||||
// Get candidates that pass all filters
|
||||
const candidates = this.#getCandidates(accounts, modelId);
|
||||
|
||||
if (candidates.length === 0) {
|
||||
logger.debug('[HybridStrategy] No candidates available');
|
||||
return { account: null, index: 0, waitMs: 0 };
|
||||
}
|
||||
|
||||
// Score and sort candidates
|
||||
const scored = candidates.map(({ account, index }) => ({
|
||||
account,
|
||||
index,
|
||||
score: this.#calculateScore(account)
|
||||
}));
|
||||
|
||||
scored.sort((a, b) => b.score - a.score);
|
||||
|
||||
// Select the best candidate
|
||||
const best = scored[0];
|
||||
best.account.lastUsed = Date.now();
|
||||
|
||||
// Consume a token from the bucket
|
||||
this.#tokenBucketTracker.consume(best.account.email);
|
||||
|
||||
if (onSave) onSave();
|
||||
|
||||
const position = best.index + 1;
|
||||
const total = accounts.length;
|
||||
logger.info(`[HybridStrategy] Using account: ${best.account.email} (${position}/${total}, score: ${best.score.toFixed(1)})`);
|
||||
|
||||
return { account: best.account, index: best.index, waitMs: 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after a successful request
|
||||
*/
|
||||
onSuccess(account, modelId) {
|
||||
if (account && account.email) {
|
||||
this.#healthTracker.recordSuccess(account.email);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a request is rate-limited
|
||||
*/
|
||||
onRateLimit(account, modelId) {
|
||||
if (account && account.email) {
|
||||
this.#healthTracker.recordRateLimit(account.email);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a request fails
|
||||
*/
|
||||
onFailure(account, modelId) {
|
||||
if (account && account.email) {
|
||||
this.#healthTracker.recordFailure(account.email);
|
||||
// Refund the token since the request didn't complete
|
||||
this.#tokenBucketTracker.refund(account.email);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get candidates that pass all filters
|
||||
* @private
|
||||
*/
|
||||
#getCandidates(accounts, modelId) {
|
||||
return accounts
|
||||
.map((account, index) => ({ account, index }))
|
||||
.filter(({ account }) => {
|
||||
// Basic usability check
|
||||
if (!this.isAccountUsable(account, modelId)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Health score check
|
||||
if (!this.#healthTracker.isUsable(account.email)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Token availability check
|
||||
if (!this.#tokenBucketTracker.hasTokens(account.email)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the combined score for an account
|
||||
* @private
|
||||
*/
|
||||
#calculateScore(account) {
|
||||
const email = account.email;
|
||||
|
||||
// Health component (0-100 scaled by weight)
|
||||
const health = this.#healthTracker.getScore(email);
|
||||
const healthComponent = health * this.#weights.health;
|
||||
|
||||
// Token component (0-100 scaled by weight)
|
||||
const tokens = this.#tokenBucketTracker.getTokens(email);
|
||||
const maxTokens = this.#tokenBucketTracker.getMaxTokens();
|
||||
const tokenRatio = tokens / maxTokens;
|
||||
const tokenComponent = (tokenRatio * 100) * this.#weights.tokens;
|
||||
|
||||
// LRU component (older = higher score)
|
||||
// Use time since last use, capped at 1 hour for scoring
|
||||
const lastUsed = account.lastUsed || 0;
|
||||
const timeSinceLastUse = Math.min(Date.now() - lastUsed, 3600000); // Cap at 1 hour
|
||||
const lruMinutes = timeSinceLastUse / 60000;
|
||||
const lruComponent = lruMinutes * this.#weights.lru;
|
||||
|
||||
return healthComponent + tokenComponent + lruComponent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the health tracker (for testing/debugging)
|
||||
* @returns {HealthTracker} The health tracker instance
|
||||
*/
|
||||
getHealthTracker() {
|
||||
return this.#healthTracker;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the token bucket tracker (for testing/debugging)
|
||||
* @returns {TokenBucketTracker} The token bucket tracker instance
|
||||
*/
|
||||
getTokenBucketTracker() {
|
||||
return this.#tokenBucketTracker;
|
||||
}
|
||||
}
|
||||
|
||||
export default HybridStrategy;
|
||||
85
src/account-manager/strategies/index.js
Normal file
85
src/account-manager/strategies/index.js
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* Strategy Factory
|
||||
*
|
||||
* Creates and exports account selection strategy instances.
|
||||
*/
|
||||
|
||||
import { StickyStrategy } from './sticky-strategy.js';
|
||||
import { RoundRobinStrategy } from './round-robin-strategy.js';
|
||||
import { HybridStrategy } from './hybrid-strategy.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import {
|
||||
SELECTION_STRATEGIES,
|
||||
DEFAULT_SELECTION_STRATEGY
|
||||
} from '../../constants.js';
|
||||
|
||||
// Re-export strategy constants for convenience
|
||||
export const STRATEGY_NAMES = SELECTION_STRATEGIES;
|
||||
export const DEFAULT_STRATEGY = DEFAULT_SELECTION_STRATEGY;
|
||||
|
||||
// Strategy display labels
|
||||
export const STRATEGY_LABELS = {
|
||||
'sticky': 'Sticky (Cache Optimized)',
|
||||
'round-robin': 'Round Robin (Load Balanced)',
|
||||
'hybrid': 'Hybrid (Smart Distribution)'
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a strategy instance
|
||||
* @param {string} strategyName - Name of the strategy ('sticky', 'round-robin', 'hybrid')
|
||||
* @param {Object} config - Strategy configuration
|
||||
* @returns {BaseStrategy} The strategy instance
|
||||
*/
|
||||
export function createStrategy(strategyName, config = {}) {
|
||||
const name = (strategyName || DEFAULT_STRATEGY).toLowerCase();
|
||||
|
||||
switch (name) {
|
||||
case 'sticky':
|
||||
logger.debug('[Strategy] Creating StickyStrategy');
|
||||
return new StickyStrategy(config);
|
||||
|
||||
case 'round-robin':
|
||||
case 'roundrobin':
|
||||
logger.debug('[Strategy] Creating RoundRobinStrategy');
|
||||
return new RoundRobinStrategy(config);
|
||||
|
||||
case 'hybrid':
|
||||
logger.debug('[Strategy] Creating HybridStrategy');
|
||||
return new HybridStrategy(config);
|
||||
|
||||
default:
|
||||
logger.warn(`[Strategy] Unknown strategy "${strategyName}", falling back to ${DEFAULT_STRATEGY}`);
|
||||
return new HybridStrategy(config);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a strategy name is valid
|
||||
* @param {string} name - Strategy name to check
|
||||
* @returns {boolean} True if valid
|
||||
*/
|
||||
export function isValidStrategy(name) {
|
||||
if (!name) return false;
|
||||
const lower = name.toLowerCase();
|
||||
return STRATEGY_NAMES.includes(lower) || lower === 'roundrobin';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the display label for a strategy
|
||||
* @param {string} name - Strategy name
|
||||
* @returns {string} Display label
|
||||
*/
|
||||
export function getStrategyLabel(name) {
|
||||
const lower = (name || DEFAULT_STRATEGY).toLowerCase();
|
||||
if (lower === 'roundrobin') return STRATEGY_LABELS['round-robin'];
|
||||
return STRATEGY_LABELS[lower] || STRATEGY_LABELS[DEFAULT_STRATEGY];
|
||||
}
|
||||
|
||||
// Re-export strategies for direct use
|
||||
export { StickyStrategy } from './sticky-strategy.js';
|
||||
export { RoundRobinStrategy } from './round-robin-strategy.js';
|
||||
export { HybridStrategy } from './hybrid-strategy.js';
|
||||
export { BaseStrategy } from './base-strategy.js';
|
||||
|
||||
// Re-export trackers
|
||||
export { HealthTracker, TokenBucketTracker } from './trackers/index.js';
|
||||
76
src/account-manager/strategies/round-robin-strategy.js
Normal file
76
src/account-manager/strategies/round-robin-strategy.js
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Round-Robin Strategy
|
||||
*
|
||||
* Rotates to the next account on every request for maximum throughput.
|
||||
* Does not maintain cache continuity but maximizes concurrent requests.
|
||||
*/
|
||||
|
||||
import { BaseStrategy } from './base-strategy.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
|
||||
export class RoundRobinStrategy extends BaseStrategy {
|
||||
#cursor = 0; // Tracks current position in rotation
|
||||
|
||||
/**
|
||||
* Create a new RoundRobinStrategy
|
||||
* @param {Object} config - Strategy configuration
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
super(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the next available account in rotation
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {string} modelId - The model ID for the request
|
||||
* @param {Object} options - Additional options
|
||||
* @returns {SelectionResult} The selected account and index
|
||||
*/
|
||||
selectAccount(accounts, modelId, options = {}) {
|
||||
const { onSave } = options;
|
||||
|
||||
if (accounts.length === 0) {
|
||||
return { account: null, index: 0, waitMs: 0 };
|
||||
}
|
||||
|
||||
// Clamp cursor to valid range
|
||||
if (this.#cursor >= accounts.length) {
|
||||
this.#cursor = 0;
|
||||
}
|
||||
|
||||
// Start from the next position after the cursor
|
||||
const startIndex = (this.#cursor + 1) % accounts.length;
|
||||
|
||||
// Try each account starting from startIndex
|
||||
for (let i = 0; i < accounts.length; i++) {
|
||||
const idx = (startIndex + i) % accounts.length;
|
||||
const account = accounts[idx];
|
||||
|
||||
if (this.isAccountUsable(account, modelId)) {
|
||||
account.lastUsed = Date.now();
|
||||
this.#cursor = idx;
|
||||
|
||||
if (onSave) onSave();
|
||||
|
||||
const position = idx + 1;
|
||||
const total = accounts.length;
|
||||
logger.info(`[RoundRobinStrategy] Using account: ${account.email} (${position}/${total})`);
|
||||
|
||||
return { account, index: idx, waitMs: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// No usable accounts found
|
||||
return { account: null, index: this.#cursor, waitMs: 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the cursor position
|
||||
*/
|
||||
resetCursor() {
|
||||
this.#cursor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
export default RoundRobinStrategy;
|
||||
138
src/account-manager/strategies/sticky-strategy.js
Normal file
138
src/account-manager/strategies/sticky-strategy.js
Normal file
@@ -0,0 +1,138 @@
|
||||
/**
|
||||
* Sticky Strategy
|
||||
*
|
||||
* Keeps using the same account until it becomes unavailable (rate-limited or invalid).
|
||||
* Best for prompt caching as it maintains cache continuity across requests.
|
||||
*/
|
||||
|
||||
import { BaseStrategy } from './base-strategy.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { formatDuration } from '../../utils/helpers.js';
|
||||
import { MAX_WAIT_BEFORE_ERROR_MS } from '../../constants.js';
|
||||
|
||||
export class StickyStrategy extends BaseStrategy {
|
||||
/**
|
||||
* Create a new StickyStrategy
|
||||
* @param {Object} config - Strategy configuration
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
super(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Select an account with sticky preference
|
||||
* Prefers the current account for cache continuity, only switches when:
|
||||
* - Current account is rate-limited for > 2 minutes
|
||||
* - Current account is invalid
|
||||
* - Current account is disabled
|
||||
*
|
||||
* @param {Array} accounts - Array of account objects
|
||||
* @param {string} modelId - The model ID for the request
|
||||
* @param {Object} options - Additional options
|
||||
* @returns {SelectionResult} The selected account and index
|
||||
*/
|
||||
selectAccount(accounts, modelId, options = {}) {
|
||||
const { currentIndex = 0, onSave } = options;
|
||||
|
||||
if (accounts.length === 0) {
|
||||
return { account: null, index: currentIndex, waitMs: 0 };
|
||||
}
|
||||
|
||||
// Clamp index to valid range
|
||||
let index = currentIndex >= accounts.length ? 0 : currentIndex;
|
||||
const currentAccount = accounts[index];
|
||||
|
||||
// Check if current account is usable
|
||||
if (this.isAccountUsable(currentAccount, modelId)) {
|
||||
currentAccount.lastUsed = Date.now();
|
||||
if (onSave) onSave();
|
||||
return { account: currentAccount, index, waitMs: 0 };
|
||||
}
|
||||
|
||||
// Current account is not usable - check if others are available
|
||||
const usableAccounts = this.getUsableAccounts(accounts, modelId);
|
||||
|
||||
if (usableAccounts.length > 0) {
|
||||
// Found a free account - switch immediately
|
||||
const { account: nextAccount, index: nextIndex } = this.#pickNext(
|
||||
accounts,
|
||||
index,
|
||||
modelId,
|
||||
onSave
|
||||
);
|
||||
if (nextAccount) {
|
||||
logger.info(`[StickyStrategy] Switched to new account (failover): ${nextAccount.email}`);
|
||||
return { account: nextAccount, index: nextIndex, waitMs: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// No other accounts available - check if we should wait for current
|
||||
const waitInfo = this.#shouldWaitForAccount(currentAccount, modelId);
|
||||
if (waitInfo.shouldWait) {
|
||||
logger.info(`[StickyStrategy] Waiting ${formatDuration(waitInfo.waitMs)} for sticky account: ${currentAccount.email}`);
|
||||
return { account: null, index, waitMs: waitInfo.waitMs };
|
||||
}
|
||||
|
||||
// Current account unavailable for too long, try to find any other
|
||||
const { account: nextAccount, index: nextIndex } = this.#pickNext(
|
||||
accounts,
|
||||
index,
|
||||
modelId,
|
||||
onSave
|
||||
);
|
||||
|
||||
return { account: nextAccount, index: nextIndex, waitMs: 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the next available account starting from after the current index
|
||||
* @private
|
||||
*/
|
||||
#pickNext(accounts, currentIndex, modelId, onSave) {
|
||||
for (let i = 1; i <= accounts.length; i++) {
|
||||
const idx = (currentIndex + i) % accounts.length;
|
||||
const account = accounts[idx];
|
||||
|
||||
if (this.isAccountUsable(account, modelId)) {
|
||||
account.lastUsed = Date.now();
|
||||
if (onSave) onSave();
|
||||
|
||||
const position = idx + 1;
|
||||
const total = accounts.length;
|
||||
logger.info(`[StickyStrategy] Using account: ${account.email} (${position}/${total})`);
|
||||
|
||||
return { account, index: idx };
|
||||
}
|
||||
}
|
||||
|
||||
return { account: null, index: currentIndex };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should wait for an account's rate limit to reset
|
||||
* @private
|
||||
*/
|
||||
#shouldWaitForAccount(account, modelId) {
|
||||
if (!account || account.isInvalid || account.enabled === false) {
|
||||
return { shouldWait: false, waitMs: 0 };
|
||||
}
|
||||
|
||||
let waitMs = 0;
|
||||
|
||||
if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) {
|
||||
const limit = account.modelRateLimits[modelId];
|
||||
if (limit.isRateLimited && limit.resetTime) {
|
||||
waitMs = limit.resetTime - Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
// Wait if within threshold
|
||||
if (waitMs > 0 && waitMs <= MAX_WAIT_BEFORE_ERROR_MS) {
|
||||
return { shouldWait: true, waitMs };
|
||||
}
|
||||
|
||||
return { shouldWait: false, waitMs: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
export default StickyStrategy;
|
||||
162
src/account-manager/strategies/trackers/health-tracker.js
Normal file
162
src/account-manager/strategies/trackers/health-tracker.js
Normal file
@@ -0,0 +1,162 @@
|
||||
/**
|
||||
* Health Tracker
|
||||
*
|
||||
* Tracks per-account health scores to prioritize healthy accounts.
|
||||
* Scores increase on success and decrease on failures/rate limits.
|
||||
* Passive recovery over time helps accounts recover from temporary issues.
|
||||
*/
|
||||
|
||||
// Default configuration (matches opencode-antigravity-auth)
|
||||
const DEFAULT_CONFIG = {
|
||||
initial: 70, // Starting score for new accounts
|
||||
successReward: 1, // Points on successful request
|
||||
rateLimitPenalty: -10, // Points on rate limit
|
||||
failurePenalty: -20, // Points on other failures
|
||||
recoveryPerHour: 2, // Passive recovery rate
|
||||
minUsable: 50, // Minimum score to be selected
|
||||
maxScore: 100 // Maximum score cap
|
||||
};
|
||||
|
||||
export class HealthTracker {
|
||||
#scores = new Map(); // email -> { score, lastUpdated, consecutiveFailures }
|
||||
#config;
|
||||
|
||||
/**
|
||||
* Create a new HealthTracker
|
||||
* @param {Object} config - Health score configuration
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
this.#config = { ...DEFAULT_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the health score for an account
|
||||
* @param {string} email - Account email
|
||||
* @returns {number} Current health score (with passive recovery applied)
|
||||
*/
|
||||
getScore(email) {
|
||||
const record = this.#scores.get(email);
|
||||
if (!record) {
|
||||
return this.#config.initial;
|
||||
}
|
||||
|
||||
// Apply passive recovery based on time elapsed
|
||||
const now = Date.now();
|
||||
const hoursElapsed = (now - record.lastUpdated) / (1000 * 60 * 60);
|
||||
const recovery = hoursElapsed * this.#config.recoveryPerHour;
|
||||
const recoveredScore = Math.min(
|
||||
this.#config.maxScore,
|
||||
record.score + recovery
|
||||
);
|
||||
|
||||
return recoveredScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a successful request for an account
|
||||
* @param {string} email - Account email
|
||||
*/
|
||||
recordSuccess(email) {
|
||||
const currentScore = this.getScore(email);
|
||||
const newScore = Math.min(
|
||||
this.#config.maxScore,
|
||||
currentScore + this.#config.successReward
|
||||
);
|
||||
this.#scores.set(email, {
|
||||
score: newScore,
|
||||
lastUpdated: Date.now(),
|
||||
consecutiveFailures: 0 // Reset on success
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a rate limit for an account
|
||||
* @param {string} email - Account email
|
||||
*/
|
||||
recordRateLimit(email) {
|
||||
const record = this.#scores.get(email);
|
||||
const currentScore = this.getScore(email);
|
||||
const newScore = Math.max(
|
||||
0,
|
||||
currentScore + this.#config.rateLimitPenalty
|
||||
);
|
||||
this.#scores.set(email, {
|
||||
score: newScore,
|
||||
lastUpdated: Date.now(),
|
||||
consecutiveFailures: (record?.consecutiveFailures ?? 0) + 1
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a failure for an account
|
||||
* @param {string} email - Account email
|
||||
*/
|
||||
recordFailure(email) {
|
||||
const record = this.#scores.get(email);
|
||||
const currentScore = this.getScore(email);
|
||||
const newScore = Math.max(
|
||||
0,
|
||||
currentScore + this.#config.failurePenalty
|
||||
);
|
||||
this.#scores.set(email, {
|
||||
score: newScore,
|
||||
lastUpdated: Date.now(),
|
||||
consecutiveFailures: (record?.consecutiveFailures ?? 0) + 1
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an account is usable based on health score
|
||||
* @param {string} email - Account email
|
||||
* @returns {boolean} True if account health score is above minimum threshold
|
||||
*/
|
||||
isUsable(email) {
|
||||
return this.getScore(email) >= this.#config.minUsable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the minimum usable score threshold
|
||||
* @returns {number} Minimum score for an account to be usable
|
||||
*/
|
||||
getMinUsable() {
|
||||
return this.#config.minUsable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the maximum score cap
|
||||
* @returns {number} Maximum health score
|
||||
*/
|
||||
getMaxScore() {
|
||||
return this.#config.maxScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the score for an account (e.g., after re-authentication)
|
||||
* @param {string} email - Account email
|
||||
*/
|
||||
reset(email) {
|
||||
this.#scores.set(email, {
|
||||
score: this.#config.initial,
|
||||
lastUpdated: Date.now(),
|
||||
consecutiveFailures: 0
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the consecutive failure count for an account
|
||||
* @param {string} email - Account email
|
||||
* @returns {number} Number of consecutive failures
|
||||
*/
|
||||
getConsecutiveFailures(email) {
|
||||
return this.#scores.get(email)?.consecutiveFailures ?? 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all tracked scores
|
||||
*/
|
||||
clear() {
|
||||
this.#scores.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export default HealthTracker;
|
||||
8
src/account-manager/strategies/trackers/index.js
Normal file
8
src/account-manager/strategies/trackers/index.js
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Trackers Index
|
||||
*
|
||||
* Exports all tracker classes for account selection strategies.
|
||||
*/
|
||||
|
||||
export { HealthTracker } from './health-tracker.js';
|
||||
export { TokenBucketTracker } from './token-bucket-tracker.js';
|
||||
121
src/account-manager/strategies/trackers/token-bucket-tracker.js
Normal file
121
src/account-manager/strategies/trackers/token-bucket-tracker.js
Normal file
@@ -0,0 +1,121 @@
|
||||
/**
|
||||
* Token Bucket Tracker
|
||||
*
|
||||
* Client-side rate limiting using the token bucket algorithm.
|
||||
* Each account has a bucket of tokens that regenerate over time.
|
||||
* Requests consume tokens; accounts without tokens are deprioritized.
|
||||
*/
|
||||
|
||||
// Default configuration (matches opencode-antigravity-auth)
|
||||
const DEFAULT_CONFIG = {
|
||||
maxTokens: 50, // Maximum token capacity
|
||||
tokensPerMinute: 6, // Regeneration rate
|
||||
initialTokens: 50 // Starting tokens
|
||||
};
|
||||
|
||||
export class TokenBucketTracker {
|
||||
#buckets = new Map(); // email -> { tokens, lastUpdated }
|
||||
#config;
|
||||
|
||||
/**
|
||||
* Create a new TokenBucketTracker
|
||||
* @param {Object} config - Token bucket configuration
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
this.#config = { ...DEFAULT_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current token count for an account
|
||||
* @param {string} email - Account email
|
||||
* @returns {number} Current token count (with regeneration applied)
|
||||
*/
|
||||
getTokens(email) {
|
||||
const bucket = this.#buckets.get(email);
|
||||
if (!bucket) {
|
||||
return this.#config.initialTokens;
|
||||
}
|
||||
|
||||
// Apply token regeneration based on time elapsed
|
||||
const now = Date.now();
|
||||
const minutesElapsed = (now - bucket.lastUpdated) / (1000 * 60);
|
||||
const regenerated = minutesElapsed * this.#config.tokensPerMinute;
|
||||
const currentTokens = Math.min(
|
||||
this.#config.maxTokens,
|
||||
bucket.tokens + regenerated
|
||||
);
|
||||
|
||||
return currentTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an account has tokens available
|
||||
* @param {string} email - Account email
|
||||
* @returns {boolean} True if account has at least 1 token
|
||||
*/
|
||||
hasTokens(email) {
|
||||
return this.getTokens(email) >= 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a token from an account's bucket
|
||||
* @param {string} email - Account email
|
||||
* @returns {boolean} True if token was consumed, false if no tokens available
|
||||
*/
|
||||
consume(email) {
|
||||
const currentTokens = this.getTokens(email);
|
||||
if (currentTokens < 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.#buckets.set(email, {
|
||||
tokens: currentTokens - 1,
|
||||
lastUpdated: Date.now()
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Refund a token to an account's bucket (e.g., on request failure before processing)
|
||||
* @param {string} email - Account email
|
||||
*/
|
||||
refund(email) {
|
||||
const currentTokens = this.getTokens(email);
|
||||
const newTokens = Math.min(
|
||||
this.#config.maxTokens,
|
||||
currentTokens + 1
|
||||
);
|
||||
this.#buckets.set(email, {
|
||||
tokens: newTokens,
|
||||
lastUpdated: Date.now()
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the maximum token capacity
|
||||
* @returns {number} Maximum tokens per bucket
|
||||
*/
|
||||
getMaxTokens() {
|
||||
return this.#config.maxTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the bucket for an account
|
||||
* @param {string} email - Account email
|
||||
*/
|
||||
reset(email) {
|
||||
this.#buckets.set(email, {
|
||||
tokens: this.#config.initialTokens,
|
||||
lastUpdated: Date.now()
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all tracked buckets
|
||||
*/
|
||||
clear() {
|
||||
this.#buckets.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export default TokenBucketTracker;
|
||||
@@ -10,6 +10,11 @@ import {
|
||||
MAX_RETRIES,
|
||||
MAX_WAIT_BEFORE_ERROR_MS,
|
||||
DEFAULT_COOLDOWN_MS,
|
||||
RATE_LIMIT_DEDUP_WINDOW_MS,
|
||||
MAX_CONSECUTIVE_FAILURES,
|
||||
EXTENDED_COOLDOWN_MS,
|
||||
CAPACITY_RETRY_DELAY_MS,
|
||||
MAX_CAPACITY_RETRIES,
|
||||
isThinkingModel
|
||||
} from '../constants.js';
|
||||
import { convertGoogleToAnthropic } from '../format/index.js';
|
||||
@@ -21,6 +26,85 @@ import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
||||
import { parseThinkingSSEResponse } from './sse-parser.js';
|
||||
import { getFallbackModel } from '../fallback-config.js';
|
||||
|
||||
/**
|
||||
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
|
||||
* Tracks last rate limit timestamp per model to skip duplicate retries
|
||||
*/
|
||||
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
|
||||
|
||||
/**
|
||||
* Check if we should skip retry due to recent rate limit on this model
|
||||
* @param {string} model - Model ID
|
||||
* @returns {boolean} True if retry should be skipped (within dedup window)
|
||||
*/
|
||||
function shouldSkipRetryDueToDedup(model) {
|
||||
const lastTimestamp = lastRateLimitTimestamps.get(model);
|
||||
if (!lastTimestamp) return false;
|
||||
|
||||
const elapsed = Date.now() - lastTimestamp;
|
||||
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
|
||||
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record rate limit timestamp for deduplication
|
||||
* @param {string} model - Model ID
|
||||
*/
|
||||
function recordRateLimitTimestamp(model) {
|
||||
lastRateLimitTimestamps.set(model, Date.now());
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear rate limit timestamp after successful retry
|
||||
* @param {string} model - Model ID
|
||||
*/
|
||||
function clearRateLimitTimestamp(model) {
|
||||
lastRateLimitTimestamps.delete(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gap 3: Detect permanent authentication failures that require re-authentication
|
||||
* These should mark the account as invalid rather than just clearing cache
|
||||
* @param {string} errorText - Error message from API
|
||||
* @returns {boolean} True if permanent auth failure
|
||||
*/
|
||||
function isPermanentAuthFailure(errorText) {
|
||||
const lower = (errorText || '').toLowerCase();
|
||||
return lower.includes('invalid_grant') ||
|
||||
lower.includes('token revoked') ||
|
||||
lower.includes('token has been expired or revoked') ||
|
||||
lower.includes('token_revoked') ||
|
||||
lower.includes('invalid_client') ||
|
||||
lower.includes('credentials are invalid');
|
||||
}
|
||||
|
||||
/**
|
||||
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
|
||||
* Capacity issues should retry on same account with shorter delay
|
||||
* @param {string} errorText - Error message from API
|
||||
* @returns {boolean} True if capacity exhausted (not quota)
|
||||
*/
|
||||
function isModelCapacityExhausted(errorText) {
|
||||
const lower = (errorText || '').toLowerCase();
|
||||
return lower.includes('model_capacity_exhausted') ||
|
||||
lower.includes('capacity_exhausted') ||
|
||||
lower.includes('model is currently overloaded') ||
|
||||
lower.includes('service temporarily unavailable');
|
||||
}
|
||||
|
||||
// Periodically clean up stale dedup timestamps (every 60 seconds)
|
||||
setInterval(() => {
|
||||
const cutoff = Date.now() - 60000; // 1 minute
|
||||
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
|
||||
if (timestamp < cutoff) {
|
||||
lastRateLimitTimestamps.delete(model);
|
||||
}
|
||||
}
|
||||
}, 60000);
|
||||
|
||||
/**
|
||||
* Send a non-streaming request to Cloud Code with multi-account support
|
||||
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
|
||||
@@ -83,10 +167,14 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
throw new Error('No accounts available');
|
||||
}
|
||||
|
||||
// Pick sticky account (prefers current for cache continuity)
|
||||
let account = accountManager.getCurrentStickyAccount(model);
|
||||
if (!account) {
|
||||
account = accountManager.pickNext(model);
|
||||
// Select account using configured strategy
|
||||
const { account, waitMs } = accountManager.selectAccount(model);
|
||||
|
||||
// If strategy returns a wait time, sleep and retry
|
||||
if (!account && waitMs > 0) {
|
||||
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
||||
await sleep(waitMs + 500);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!account) {
|
||||
@@ -101,11 +189,14 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
|
||||
logger.debug(`[CloudCode] Sending request for model: ${model}`);
|
||||
|
||||
// Try each endpoint
|
||||
// Try each endpoint with index-based loop for capacity retry support
|
||||
let lastError = null;
|
||||
let retriedOnce = false; // Track if we've already retried for short rate limit
|
||||
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
|
||||
let endpointIndex = 0;
|
||||
|
||||
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
||||
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
||||
const endpoint = ANTIGRAVITY_ENDPOINT_FALLBACKS[endpointIndex];
|
||||
try {
|
||||
const url = isThinking
|
||||
? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
|
||||
@@ -122,16 +213,45 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
|
||||
|
||||
if (response.status === 401) {
|
||||
// Auth error - clear caches and retry with fresh token
|
||||
logger.warn('[CloudCode] Auth error, refreshing token...');
|
||||
// Gap 3: Check for permanent auth failures
|
||||
if (isPermanentAuthFailure(errorText)) {
|
||||
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
||||
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
||||
throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
|
||||
}
|
||||
|
||||
// Transient auth error - clear caches and retry with fresh token
|
||||
logger.warn('[CloudCode] Transient auth error, refreshing token...');
|
||||
accountManager.clearTokenCache(account.email);
|
||||
accountManager.clearProjectCache(account.email);
|
||||
endpointIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response.status === 429) {
|
||||
const resetMs = parseResetTime(response, errorText);
|
||||
|
||||
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
|
||||
if (isModelCapacityExhausted(errorText)) {
|
||||
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
||||
capacityRetryCount++;
|
||||
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
|
||||
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
||||
await sleep(waitMs);
|
||||
// Don't increment endpointIndex - retry same endpoint
|
||||
continue;
|
||||
}
|
||||
// Max capacity retries exceeded - treat as quota exhaustion
|
||||
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
||||
}
|
||||
|
||||
// Gap 1: Check deduplication window to prevent thundering herd
|
||||
if (shouldSkipRetryDueToDedup(model)) {
|
||||
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
|
||||
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
|
||||
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
||||
}
|
||||
|
||||
// Decision: wait and retry OR switch account
|
||||
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
||||
// Long-term quota exhaustion (> 10s) - switch to next account
|
||||
@@ -144,31 +264,11 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
|
||||
if (!retriedOnce) {
|
||||
retriedOnce = true;
|
||||
recordRateLimitTimestamp(model); // Gap 1: Record before retry
|
||||
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
||||
await sleep(waitMs);
|
||||
// Retry same endpoint
|
||||
const retryResponse = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (retryResponse.ok) {
|
||||
// Process retry response
|
||||
if (isThinking) {
|
||||
return await parseThinkingSSEResponse(retryResponse, anthropicRequest.model);
|
||||
}
|
||||
const data = await retryResponse.json();
|
||||
logger.debug('[CloudCode] Response received after retry');
|
||||
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
||||
}
|
||||
|
||||
// Retry also failed - parse new reset time
|
||||
const retryErrorText = await retryResponse.text();
|
||||
const retryResetMs = parseResetTime(retryResponse, retryErrorText);
|
||||
logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
|
||||
accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
|
||||
throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
|
||||
// Don't increment endpointIndex - retry same endpoint
|
||||
continue;
|
||||
} else {
|
||||
// Already retried once, mark and switch
|
||||
accountManager.markRateLimited(account.email, waitMs, model);
|
||||
@@ -184,18 +284,26 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
|
||||
await sleep(1000);
|
||||
}
|
||||
endpointIndex++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// For thinking models, parse SSE and accumulate all parts
|
||||
if (isThinking) {
|
||||
return await parseThinkingSSEResponse(response, anthropicRequest.model);
|
||||
const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
|
||||
// Gap 1: Clear timestamp on success
|
||||
clearRateLimitTimestamp(model);
|
||||
accountManager.notifySuccess(account, model);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Non-thinking models use regular JSON
|
||||
const data = await response.json();
|
||||
logger.debug('[CloudCode] Response received');
|
||||
// Gap 1: Clear timestamp on success
|
||||
clearRateLimitTimestamp(model);
|
||||
accountManager.notifySuccess(account, model);
|
||||
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
||||
|
||||
} catch (endpointError) {
|
||||
@@ -204,6 +312,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
}
|
||||
logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
|
||||
lastError = endpointError;
|
||||
endpointIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,7 +328,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
|
||||
} catch (error) {
|
||||
if (isRateLimitError(error)) {
|
||||
// Rate limited - already marked, continue to next account
|
||||
// Rate limited - already marked, notify strategy and continue to next account
|
||||
accountManager.notifyRateLimit(account, model);
|
||||
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
|
||||
continue;
|
||||
}
|
||||
@@ -230,15 +340,31 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
||||
}
|
||||
// Handle 5xx errors
|
||||
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
|
||||
accountManager.pickNext(model);
|
||||
accountManager.notifyFailure(account, model);
|
||||
|
||||
// Gap 2: Check consecutive failures for extended cooldown
|
||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||
} else {
|
||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isNetworkError(error)) {
|
||||
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
|
||||
accountManager.notifyFailure(account, model);
|
||||
|
||||
// Gap 2: Check consecutive failures for extended cooldown
|
||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||
} else {
|
||||
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
|
||||
}
|
||||
await sleep(1000);
|
||||
accountManager.pickNext(model);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,12 @@ import {
|
||||
MAX_RETRIES,
|
||||
MAX_EMPTY_RESPONSE_RETRIES,
|
||||
MAX_WAIT_BEFORE_ERROR_MS,
|
||||
DEFAULT_COOLDOWN_MS
|
||||
DEFAULT_COOLDOWN_MS,
|
||||
RATE_LIMIT_DEDUP_WINDOW_MS,
|
||||
MAX_CONSECUTIVE_FAILURES,
|
||||
EXTENDED_COOLDOWN_MS,
|
||||
CAPACITY_RETRY_DELAY_MS,
|
||||
MAX_CAPACITY_RETRIES
|
||||
} from '../constants.js';
|
||||
import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
|
||||
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
||||
@@ -21,6 +26,83 @@ import { streamSSEResponse } from './sse-streamer.js';
|
||||
import { getFallbackModel } from '../fallback-config.js';
|
||||
import crypto from 'crypto';
|
||||
|
||||
/**
|
||||
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
|
||||
* Tracks last rate limit timestamp per model to skip duplicate retries
|
||||
*/
|
||||
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
|
||||
|
||||
/**
|
||||
* Check if we should skip retry due to recent rate limit on this model
|
||||
* @param {string} model - Model ID
|
||||
* @returns {boolean} True if retry should be skipped (within dedup window)
|
||||
*/
|
||||
function shouldSkipRetryDueToDedup(model) {
|
||||
const lastTimestamp = lastRateLimitTimestamps.get(model);
|
||||
if (!lastTimestamp) return false;
|
||||
|
||||
const elapsed = Date.now() - lastTimestamp;
|
||||
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
|
||||
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record rate limit timestamp for deduplication
|
||||
* @param {string} model - Model ID
|
||||
*/
|
||||
function recordRateLimitTimestamp(model) {
|
||||
lastRateLimitTimestamps.set(model, Date.now());
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear rate limit timestamp after successful retry
|
||||
* @param {string} model - Model ID
|
||||
*/
|
||||
function clearRateLimitTimestamp(model) {
|
||||
lastRateLimitTimestamps.delete(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gap 3: Detect permanent authentication failures that require re-authentication
|
||||
* @param {string} errorText - Error message from API
|
||||
* @returns {boolean} True if permanent auth failure
|
||||
*/
|
||||
function isPermanentAuthFailure(errorText) {
|
||||
const lower = (errorText || '').toLowerCase();
|
||||
return lower.includes('invalid_grant') ||
|
||||
lower.includes('token revoked') ||
|
||||
lower.includes('token has been expired or revoked') ||
|
||||
lower.includes('token_revoked') ||
|
||||
lower.includes('invalid_client') ||
|
||||
lower.includes('credentials are invalid');
|
||||
}
|
||||
|
||||
/**
|
||||
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
|
||||
* @param {string} errorText - Error message from API
|
||||
* @returns {boolean} True if capacity exhausted (not quota)
|
||||
*/
|
||||
function isModelCapacityExhausted(errorText) {
|
||||
const lower = (errorText || '').toLowerCase();
|
||||
return lower.includes('model_capacity_exhausted') ||
|
||||
lower.includes('capacity_exhausted') ||
|
||||
lower.includes('model is currently overloaded') ||
|
||||
lower.includes('service temporarily unavailable');
|
||||
}
|
||||
|
||||
// Periodically clean up stale dedup timestamps (every 60 seconds)
|
||||
setInterval(() => {
|
||||
const cutoff = Date.now() - 60000; // 1 minute
|
||||
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
|
||||
if (timestamp < cutoff) {
|
||||
lastRateLimitTimestamps.delete(model);
|
||||
}
|
||||
}
|
||||
}, 60000);
|
||||
|
||||
/**
|
||||
* Send a streaming request to Cloud Code with multi-account support
|
||||
* Streams events in real-time as they arrive from the server
|
||||
@@ -83,10 +165,14 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
throw new Error('No accounts available');
|
||||
}
|
||||
|
||||
// Pick sticky account (prefers current for cache continuity)
|
||||
let account = accountManager.getCurrentStickyAccount(model);
|
||||
if (!account) {
|
||||
account = accountManager.pickNext(model);
|
||||
// Select account using configured strategy
|
||||
const { account, waitMs } = accountManager.selectAccount(model);
|
||||
|
||||
// If strategy returns a wait time, sleep and retry
|
||||
if (!account && waitMs > 0) {
|
||||
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
||||
await sleep(waitMs + 500);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!account) {
|
||||
@@ -101,11 +187,14 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
|
||||
logger.debug(`[CloudCode] Starting stream for model: ${model}`);
|
||||
|
||||
// Try each endpoint for streaming
|
||||
// Try each endpoint with index-based loop for capacity retry support
|
||||
let lastError = null;
|
||||
let retriedOnce = false; // Track if we've already retried for short rate limit
|
||||
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
|
||||
let endpointIndex = 0;
|
||||
|
||||
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
||||
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
||||
const endpoint = ANTIGRAVITY_ENDPOINT_FALLBACKS[endpointIndex];
|
||||
try {
|
||||
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
||||
|
||||
@@ -120,15 +209,44 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
|
||||
|
||||
if (response.status === 401) {
|
||||
// Auth error - clear caches and retry
|
||||
// Gap 3: Check for permanent auth failures
|
||||
if (isPermanentAuthFailure(errorText)) {
|
||||
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
||||
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
||||
throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
|
||||
}
|
||||
|
||||
// Transient auth error - clear caches and retry
|
||||
accountManager.clearTokenCache(account.email);
|
||||
accountManager.clearProjectCache(account.email);
|
||||
endpointIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response.status === 429) {
|
||||
const resetMs = parseResetTime(response, errorText);
|
||||
|
||||
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
|
||||
if (isModelCapacityExhausted(errorText)) {
|
||||
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
||||
capacityRetryCount++;
|
||||
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
|
||||
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
||||
await sleep(waitMs);
|
||||
// Don't increment endpointIndex - retry same endpoint
|
||||
continue;
|
||||
}
|
||||
// Max capacity retries exceeded - treat as quota exhaustion
|
||||
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
||||
}
|
||||
|
||||
// Gap 1: Check deduplication window to prevent thundering herd
|
||||
if (shouldSkipRetryDueToDedup(model)) {
|
||||
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
|
||||
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
|
||||
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
||||
}
|
||||
|
||||
// Decision: wait and retry OR switch account
|
||||
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
||||
// Long-term quota exhaustion (> 10s) - switch to next account
|
||||
@@ -141,28 +259,11 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
|
||||
if (!retriedOnce) {
|
||||
retriedOnce = true;
|
||||
recordRateLimitTimestamp(model); // Gap 1: Record before retry
|
||||
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
||||
await sleep(waitMs);
|
||||
// Retry same endpoint
|
||||
const retryResponse = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: buildHeaders(token, model, 'text/event-stream'),
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (retryResponse.ok) {
|
||||
// Stream the retry response
|
||||
yield* streamSSEResponse(retryResponse, anthropicRequest.model);
|
||||
logger.debug('[CloudCode] Stream completed after retry');
|
||||
return;
|
||||
}
|
||||
|
||||
// Retry also failed - parse new reset time
|
||||
const retryErrorText = await retryResponse.text();
|
||||
const retryResetMs = parseResetTime(retryResponse, retryErrorText);
|
||||
logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
|
||||
accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
|
||||
throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
|
||||
// Don't increment endpointIndex - retry same endpoint
|
||||
continue;
|
||||
} else {
|
||||
// Already retried once, mark and switch
|
||||
accountManager.markRateLimited(account.email, waitMs, model);
|
||||
@@ -179,6 +280,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
endpointIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -189,6 +291,9 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
try {
|
||||
yield* streamSSEResponse(currentResponse, anthropicRequest.model);
|
||||
logger.debug('[CloudCode] Stream completed');
|
||||
// Gap 1: Clear timestamp on success
|
||||
clearRateLimitTimestamp(model);
|
||||
accountManager.notifySuccess(account, model);
|
||||
return;
|
||||
} catch (streamError) {
|
||||
// Only retry on EmptyResponseError
|
||||
@@ -226,8 +331,13 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
throw new Error(`429 RESOURCE_EXHAUSTED during retry: ${retryErrorText}`);
|
||||
}
|
||||
|
||||
// Auth error - clear caches and throw with recognizable message
|
||||
// Auth error - check for permanent failure
|
||||
if (currentResponse.status === 401) {
|
||||
if (isPermanentAuthFailure(retryErrorText)) {
|
||||
logger.error(`[CloudCode] Permanent auth failure during retry for ${account.email}`);
|
||||
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
||||
throw new Error(`AUTH_INVALID_PERMANENT: ${retryErrorText}`);
|
||||
}
|
||||
accountManager.clearTokenCache(account.email);
|
||||
accountManager.clearProjectCache(account.email);
|
||||
throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
|
||||
@@ -261,6 +371,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
}
|
||||
logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
|
||||
lastError = endpointError;
|
||||
endpointIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,7 +387,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
|
||||
} catch (error) {
|
||||
if (isRateLimitError(error)) {
|
||||
// Rate limited - already marked, continue to next account
|
||||
// Rate limited - already marked, notify strategy and continue to next account
|
||||
accountManager.notifyRateLimit(account, model);
|
||||
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
|
||||
continue;
|
||||
}
|
||||
@@ -287,15 +399,31 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
||||
}
|
||||
// Handle 5xx errors
|
||||
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
|
||||
accountManager.pickNext(model);
|
||||
accountManager.notifyFailure(account, model);
|
||||
|
||||
// Gap 2: Check consecutive failures for extended cooldown
|
||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||
} else {
|
||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isNetworkError(error)) {
|
||||
logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
|
||||
accountManager.notifyFailure(account, model);
|
||||
|
||||
// Gap 2: Check consecutive failures for extended cooldown
|
||||
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
||||
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
||||
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
||||
} else {
|
||||
logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
|
||||
}
|
||||
await sleep(1000);
|
||||
accountManager.pickNext(model);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,26 @@ const DEFAULT_CONFIG = {
|
||||
persistTokenCache: false,
|
||||
defaultCooldownMs: 10000, // 10 seconds
|
||||
maxWaitBeforeErrorMs: 120000, // 2 minutes
|
||||
modelMapping: {}
|
||||
modelMapping: {},
|
||||
// Account selection strategy configuration
|
||||
accountSelection: {
|
||||
strategy: 'hybrid', // 'sticky' | 'round-robin' | 'hybrid'
|
||||
// Hybrid strategy tuning (optional - sensible defaults)
|
||||
healthScore: {
|
||||
initial: 70, // Starting score for new accounts
|
||||
successReward: 1, // Points on successful request
|
||||
rateLimitPenalty: -10, // Points on rate limit
|
||||
failurePenalty: -20, // Points on other failures
|
||||
recoveryPerHour: 2, // Passive recovery rate
|
||||
minUsable: 50, // Minimum score to be selected
|
||||
maxScore: 100 // Maximum score cap
|
||||
},
|
||||
tokenBucket: {
|
||||
maxTokens: 50, // Maximum token capacity
|
||||
tokensPerMinute: 6, // Regeneration rate
|
||||
initialTokens: 50 // Starting tokens
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Config locations
|
||||
|
||||
@@ -103,9 +103,24 @@ export const MAX_ACCOUNTS = config?.maxAccounts || 10; // From config or 10
|
||||
// Rate limit wait thresholds
|
||||
export const MAX_WAIT_BEFORE_ERROR_MS = config?.maxWaitBeforeErrorMs || 120000; // From config or 2 minutes
|
||||
|
||||
// Gap 1: Retry deduplication - prevents thundering herd on concurrent rate limits
|
||||
export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 5000; // 5 seconds
|
||||
|
||||
// Gap 2: Consecutive failure tracking - extended cooldown after repeated failures
|
||||
export const MAX_CONSECUTIVE_FAILURES = config?.maxConsecutiveFailures || 3;
|
||||
export const EXTENDED_COOLDOWN_MS = config?.extendedCooldownMs || 60000; // 1 minute
|
||||
|
||||
// Gap 4: Capacity exhaustion - shorter retry for model capacity issues (not quota)
|
||||
export const CAPACITY_RETRY_DELAY_MS = config?.capacityRetryDelayMs || 2000; // 2 seconds
|
||||
export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 3;
|
||||
|
||||
// Thinking model constants
|
||||
export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length
|
||||
|
||||
// Account selection strategies
|
||||
export const SELECTION_STRATEGIES = ['sticky', 'round-robin', 'hybrid'];
|
||||
export const DEFAULT_SELECTION_STRATEGY = 'hybrid';
|
||||
|
||||
// Gemini-specific limits
|
||||
export const GEMINI_MAX_OUTPUT_TOKENS = 16384;
|
||||
|
||||
@@ -235,6 +250,11 @@ export default {
|
||||
MAX_EMPTY_RESPONSE_RETRIES,
|
||||
MAX_ACCOUNTS,
|
||||
MAX_WAIT_BEFORE_ERROR_MS,
|
||||
RATE_LIMIT_DEDUP_WINDOW_MS,
|
||||
MAX_CONSECUTIVE_FAILURES,
|
||||
EXTENDED_COOLDOWN_MS,
|
||||
CAPACITY_RETRY_DELAY_MS,
|
||||
MAX_CAPACITY_RETRIES,
|
||||
MIN_SIGNATURE_LENGTH,
|
||||
GEMINI_MAX_OUTPUT_TOKENS,
|
||||
GEMINI_SKIP_SIGNATURE,
|
||||
|
||||
@@ -149,6 +149,23 @@ export class EmptyResponseError extends AntigravityError {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Capacity exhausted error - Google's model is at capacity (not user quota)
|
||||
* Should retry on same account with shorter delay, not switch accounts immediately
|
||||
* Different from QUOTA_EXHAUSTED which indicates user's daily/hourly limit
|
||||
*/
|
||||
export class CapacityExhaustedError extends AntigravityError {
|
||||
/**
|
||||
* @param {string} message - Error message
|
||||
* @param {number|null} retryAfterMs - Suggested retry delay in ms
|
||||
*/
|
||||
constructor(message = 'Model capacity exhausted', retryAfterMs = null) {
|
||||
super(message, 'CAPACITY_EXHAUSTED', true, { retryAfterMs });
|
||||
this.name = 'CapacityExhaustedError';
|
||||
this.retryAfterMs = retryAfterMs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is a rate limit error
|
||||
* Works with both custom error classes and legacy string-based errors
|
||||
@@ -188,6 +205,22 @@ export function isEmptyResponseError(error) {
|
||||
error?.name === 'EmptyResponseError';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is a capacity exhausted error (model overload, not user quota)
|
||||
* This is different from quota exhaustion - capacity issues are temporary infrastructure
|
||||
* limits that should be retried on the SAME account with shorter delays
|
||||
* @param {Error} error - Error to check
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCapacityExhaustedError(error) {
|
||||
if (error instanceof CapacityExhaustedError) return true;
|
||||
const msg = (error.message || '').toLowerCase();
|
||||
return msg.includes('model_capacity_exhausted') ||
|
||||
msg.includes('capacity_exhausted') ||
|
||||
msg.includes('model is currently overloaded') ||
|
||||
msg.includes('service temporarily unavailable');
|
||||
}
|
||||
|
||||
export default {
|
||||
AntigravityError,
|
||||
RateLimitError,
|
||||
@@ -197,7 +230,9 @@ export default {
|
||||
ApiError,
|
||||
NativeModuleError,
|
||||
EmptyResponseError,
|
||||
CapacityExhaustedError,
|
||||
isRateLimitError,
|
||||
isAuthError,
|
||||
isEmptyResponseError
|
||||
isEmptyResponseError,
|
||||
isCapacityExhaustedError
|
||||
};
|
||||
|
||||
42
src/index.js
42
src/index.js
@@ -3,9 +3,10 @@
|
||||
* Entry point - starts the proxy server
|
||||
*/
|
||||
|
||||
import app from './server.js';
|
||||
import app, { accountManager } from './server.js';
|
||||
import { DEFAULT_PORT } from './constants.js';
|
||||
import { logger } from './utils/logger.js';
|
||||
import { getStrategyLabel, STRATEGY_NAMES, DEFAULT_STRATEGY } from './account-manager/strategies/index.js';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
|
||||
@@ -14,6 +15,21 @@ const args = process.argv.slice(2);
|
||||
const isDebug = args.includes('--debug') || process.env.DEBUG === 'true';
|
||||
const isFallbackEnabled = args.includes('--fallback') || process.env.FALLBACK === 'true';
|
||||
|
||||
// Parse --strategy flag (format: --strategy=sticky or --strategy sticky)
|
||||
let strategyOverride = null;
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i].startsWith('--strategy=')) {
|
||||
strategyOverride = args[i].split('=')[1];
|
||||
} else if (args[i] === '--strategy' && args[i + 1]) {
|
||||
strategyOverride = args[i + 1];
|
||||
}
|
||||
}
|
||||
// Validate strategy
|
||||
if (strategyOverride && !STRATEGY_NAMES.includes(strategyOverride.toLowerCase())) {
|
||||
logger.warn(`[Startup] Invalid strategy "${strategyOverride}". Valid options: ${STRATEGY_NAMES.join(', ')}. Using default.`);
|
||||
strategyOverride = null;
|
||||
}
|
||||
|
||||
// Initialize logger
|
||||
logger.setDebug(isDebug);
|
||||
|
||||
@@ -45,6 +61,7 @@ const server = app.listen(PORT, () => {
|
||||
|
||||
// Build Control section dynamically
|
||||
let controlSection = '║ Control: ║\n';
|
||||
controlSection += '║ --strategy=<s> Set selection strategy (sticky/hybrid) ║\n';
|
||||
if (!isDebug) {
|
||||
controlSection += '║ --debug Enable debug logging ║\n';
|
||||
}
|
||||
@@ -53,17 +70,18 @@ const server = app.listen(PORT, () => {
|
||||
}
|
||||
controlSection += '║ Ctrl+C Stop server ║';
|
||||
|
||||
// Build status section if any modes are active
|
||||
let statusSection = '';
|
||||
if (isDebug || isFallbackEnabled) {
|
||||
statusSection = '║ ║\n';
|
||||
statusSection += '║ Active Modes: ║\n';
|
||||
if (isDebug) {
|
||||
statusSection += '║ ✓ Debug mode enabled ║\n';
|
||||
}
|
||||
if (isFallbackEnabled) {
|
||||
statusSection += '║ ✓ Model fallback enabled ║\n';
|
||||
}
|
||||
// Get the strategy label (accountManager will be initialized by now)
|
||||
const strategyLabel = accountManager.getStrategyLabel();
|
||||
|
||||
// Build status section - always show strategy, plus any active modes
|
||||
let statusSection = '║ ║\n';
|
||||
statusSection += '║ Active Modes: ║\n';
|
||||
statusSection += `${border} ${align4(`✓ Strategy: ${strategyLabel}`)}${border}\n`;
|
||||
if (isDebug) {
|
||||
statusSection += '║ ✓ Debug mode enabled ║\n';
|
||||
}
|
||||
if (isFallbackEnabled) {
|
||||
statusSection += '║ ✓ Model fallback enabled ║\n';
|
||||
}
|
||||
|
||||
logger.log(`
|
||||
|
||||
@@ -26,13 +26,23 @@ import usageStats from './modules/usage-stats.js';
|
||||
const args = process.argv.slice(2);
|
||||
const FALLBACK_ENABLED = args.includes('--fallback') || process.env.FALLBACK === 'true';
|
||||
|
||||
// Parse --strategy flag (format: --strategy=sticky or --strategy sticky)
|
||||
let STRATEGY_OVERRIDE = null;
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i].startsWith('--strategy=')) {
|
||||
STRATEGY_OVERRIDE = args[i].split('=')[1];
|
||||
} else if (args[i] === '--strategy' && args[i + 1]) {
|
||||
STRATEGY_OVERRIDE = args[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
const app = express();
|
||||
|
||||
// Disable x-powered-by header for security
|
||||
app.disable('x-powered-by');
|
||||
|
||||
// Initialize account manager (will be fully initialized on first request or startup)
|
||||
const accountManager = new AccountManager();
|
||||
export const accountManager = new AccountManager();
|
||||
|
||||
// Track initialization status
|
||||
let isInitialized = false;
|
||||
@@ -50,7 +60,7 @@ async function ensureInitialized() {
|
||||
|
||||
initPromise = (async () => {
|
||||
try {
|
||||
await accountManager.initialize();
|
||||
await accountManager.initialize(STRATEGY_OVERRIDE);
|
||||
isInitialized = true;
|
||||
const status = accountManager.getStatus();
|
||||
logger.success(`[Server] Account pool initialized: ${status.summary}`);
|
||||
|
||||
@@ -282,7 +282,7 @@ export function mountWebUI(app, dirname, accountManager) {
|
||||
*/
|
||||
app.post('/api/config', (req, res) => {
|
||||
try {
|
||||
const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs } = req.body;
|
||||
const { debug, logLevel, maxRetries, retryBaseMs, retryMaxMs, persistTokenCache, defaultCooldownMs, maxWaitBeforeErrorMs, accountSelection } = req.body;
|
||||
|
||||
// Only allow updating specific fields (security)
|
||||
const updates = {};
|
||||
@@ -308,6 +308,16 @@ export function mountWebUI(app, dirname, accountManager) {
|
||||
if (typeof maxWaitBeforeErrorMs === 'number' && maxWaitBeforeErrorMs >= 0 && maxWaitBeforeErrorMs <= 600000) {
|
||||
updates.maxWaitBeforeErrorMs = maxWaitBeforeErrorMs;
|
||||
}
|
||||
// Account selection strategy validation
|
||||
if (accountSelection && typeof accountSelection === 'object') {
|
||||
const validStrategies = ['sticky', 'round-robin', 'hybrid'];
|
||||
if (accountSelection.strategy && validStrategies.includes(accountSelection.strategy)) {
|
||||
updates.accountSelection = {
|
||||
...(config.accountSelection || {}),
|
||||
strategy: accountSelection.strategy
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(updates).length === 0) {
|
||||
return res.status(400).json({
|
||||
|
||||
Reference in New Issue
Block a user