From 9c4a712a9afbed5995c12afd6f7d299076232f4a Mon Sep 17 00:00:00 2001 From: Badri Narayanan S <59133612+badri-s2001@users.noreply.github.com> Date: Sat, 3 Jan 2026 15:33:49 +0530 Subject: [PATCH] Selective fixes from PR #35: Model-specific rate limits & robustness improvements (#37) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: apply local user changes and fixes * ;D * Implement OpenAI support, model-specific rate limiting, and robustness fixes * docs: update pr title * feat: ensure unique openai models endpoint * fix: startup banner alignment and removed duplicates * feat: add model fallback system with --fallback flag * fix: accounts cli hanging after completion * feat: add exit option to accounts cli menu * fix: remove circular dependency warning for fallback flag * feat: show active modes in banner and hide their flags * Remove OpenAI compatibility and fallback features from PR #35 Cherry-picked selective fixes from PR #35 while removing: - OpenAI-compatible API endpoints (/openai/v1/*) - Model fallback system (fallback-config.js) - Thinking block skip for Gemini models - Unnecessary files (pullrequest.md, test-fix.js, test-openai.js) Retained improvements: - Network error handling with retry logic - Model-specific rate limiting - Enhanced health check with quota info - CLI fixes (exit option, process.exit) - Startup banner alignment (debug mode only) šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * banner alignment fix * Refactor: Model-specific rate limits and cleanup deprecated code - Remove global rate limit fields (isRateLimited, rateLimitResetTime) in favor of model-specific limits (modelRateLimits[modelId]) - Remove deprecated wrapper functions (is429Error, isAuthInvalidError) from handlers - Filter fetchAvailableModels to only return Claude and Gemini models - Fix getCurrentStickyAccount() to pass model param after waiting - Update /account-limits endpoint to show model-specific limits - Remove multi-account OAuth flow to avoid state mismatch errors šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * fix: show (x/y) limited status in account-limits table - Status is now "ok" only when all models are available - Shows "(x/y) limited" when x out of y models are exhausted - Provides better visibility into partial rate limiting šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * docs: update CLAUDE.md with model-specific rate limiting - Document modelRateLimits[modelId] for per-model rate tracking - Add isNetworkError() helper to utilities section šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --------- Co-authored-by: M1noa Co-authored-by: Minoa Co-authored-by: Claude --- CLAUDE.md | 4 +- src/account-manager/credentials.js | 8 ++ src/account-manager/index.js | 52 +++++---- src/account-manager/rate-limits.js | 95 ++++++++++++----- src/account-manager/selection.js | 65 ++++++++---- src/account-manager/storage.js | 13 +-- src/cli/accounts.js | 52 ++++----- src/cloudcode/message-handler.js | 47 ++++----- src/cloudcode/model-api.js | 19 +++- src/cloudcode/streaming-handler.js | 46 ++++---- src/format/content-converter.js | 2 + src/format/request-converter.js | 10 ++ src/index.js | 44 +++++--- src/server.js | 164 ++++++++++++++++++++++++----- src/utils/helpers.js | 47 +++++++++ 15 files changed, 474 insertions(+), 194 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index ceee3cd..443c3ff 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -106,7 +106,8 @@ src/ **Multi-Account Load Balancing:** - Sticky account selection for prompt caching (stays on same account across turns) -- Automatic switch only when rate-limited for > 2 minutes +- Model-specific rate limiting via `account.modelRateLimits[modelId]` +- Automatic switch only when rate-limited for > 2 minutes on the current model - Session ID derived from first user message hash for cache continuity - Account state persisted to `~/.config/antigravity-proxy/accounts.json` @@ -147,6 +148,7 @@ src/ **Utilities:** Shared helpers in `src/utils/helpers.js`: - `formatDuration(ms)` - Format milliseconds as "1h23m45s" - `sleep(ms)` - Promise-based delay +- `isNetworkError(error)` - Check if error is a transient network error **Logger:** Structured logging via `src/utils/logger.js`: - `logger.info(msg)` - Standard info (blue) diff --git a/src/account-manager/credentials.js b/src/account-manager/credentials.js index 7362c70..f5e3489 100644 --- a/src/account-manager/credentials.js +++ b/src/account-manager/credentials.js @@ -14,6 +14,7 @@ import { import { refreshAccessToken } from '../auth/oauth.js'; import { getAuthStatus } from '../auth/database.js'; import { logger } from '../utils/logger.js'; +import { isNetworkError } from '../utils/helpers.js'; /** * Get OAuth token for an account @@ -48,6 +49,13 @@ export async function getTokenForAccount(account, tokenCache, onInvalid, onSave) } logger.success(`[AccountManager] Refreshed OAuth token for: ${account.email}`); } catch (error) { + // Check if it's a transient network error + if (isNetworkError(error)) { + logger.warn(`[AccountManager] Failed to refresh token for ${account.email} due to network error: ${error.message}`); + // Do NOT mark as invalid, just throw so caller knows it failed + throw new Error(`AUTH_NETWORK_ERROR: ${error.message}`); + } + logger.error(`[AccountManager] Failed to refresh token for ${account.email}:`, error.message); // Mark account as invalid (credentials need re-auth) if (onInvalid) onInvalid(account.email, error.message); diff --git a/src/account-manager/index.js b/src/account-manager/index.js index 6cc1023..b730c1c 100644 --- a/src/account-manager/index.js +++ b/src/account-manager/index.js @@ -81,18 +81,20 @@ export class AccountManager { /** * Check if all accounts are rate-limited + * @param {string} [modelId] - Optional model ID * @returns {boolean} True if all accounts are rate-limited */ - isAllRateLimited() { - return checkAllRateLimited(this.#accounts); + isAllRateLimited(modelId = null) { + return checkAllRateLimited(this.#accounts, modelId); } /** * Get list of available (non-rate-limited, non-invalid) accounts + * @param {string} [modelId] - Optional model ID * @returns {Array} Array of available account objects */ - getAvailableAccounts() { - return getAvailable(this.#accounts); + getAvailableAccounts(modelId = null) { + return getAvailable(this.#accounts, modelId); } /** @@ -127,10 +129,11 @@ export class AccountManager { /** * Pick the next available account (fallback when current is unavailable). * Sets activeIndex to the selected account's index. + * @param {string} [modelId] - Optional model ID * @returns {Object|null} The next available account or null if none available */ - pickNext() { - const { account, newIndex } = selectNext(this.#accounts, this.#currentIndex, () => this.saveToDisk()); + pickNext(modelId = null) { + const { account, newIndex } = selectNext(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId); this.#currentIndex = newIndex; return account; } @@ -138,10 +141,11 @@ export class AccountManager { /** * Get the current account without advancing the index (sticky selection). * Used for cache continuity - sticks to the same account until rate-limited. + * @param {string} [modelId] - Optional model ID * @returns {Object|null} The current account or null if unavailable/rate-limited */ - getCurrentStickyAccount() { - const { account, newIndex } = getSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk()); + getCurrentStickyAccount(modelId = null) { + const { account, newIndex } = getSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId); this.#currentIndex = newIndex; return account; } @@ -149,10 +153,11 @@ export class AccountManager { /** * Check if we should wait for the current account's rate limit to reset. * Used for sticky account selection - wait if rate limit is short (≤ threshold). + * @param {string} [modelId] - Optional model ID * @returns {{shouldWait: boolean, waitMs: number, account: Object|null}} */ - shouldWaitForCurrentAccount() { - return shouldWait(this.#accounts, this.#currentIndex); + shouldWaitForCurrentAccount(modelId = null) { + return shouldWait(this.#accounts, this.#currentIndex, modelId); } /** @@ -160,10 +165,11 @@ export class AccountManager { * Prefers the current account for cache continuity, only switches when: * - Current account is rate-limited for > 2 minutes * - Current account is invalid + * @param {string} [modelId] - Optional model ID * @returns {{account: Object|null, waitMs: number}} Account to use and optional wait time */ - pickStickyAccount() { - const { account, waitMs, newIndex } = selectSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk()); + pickStickyAccount(modelId = null) { + const { account, waitMs, newIndex } = selectSticky(this.#accounts, this.#currentIndex, () => this.saveToDisk(), modelId); this.#currentIndex = newIndex; return { account, waitMs }; } @@ -172,9 +178,10 @@ export class AccountManager { * Mark an account as rate-limited * @param {string} email - Email of the account to mark * @param {number|null} resetMs - Time in ms until rate limit resets (optional) + * @param {string} [modelId] - Optional model ID to mark specific limit */ - markRateLimited(email, resetMs = null) { - markLimited(this.#accounts, email, resetMs, this.#settings); + markRateLimited(email, resetMs = null, modelId = null) { + markLimited(this.#accounts, email, resetMs, this.#settings, modelId); this.saveToDisk(); } @@ -190,10 +197,11 @@ export class AccountManager { /** * Get the minimum wait time until any account becomes available + * @param {string} [modelId] - Optional model ID * @returns {number} Wait time in milliseconds */ - getMinWaitTimeMs() { - return getMinWait(this.#accounts); + getMinWaitTimeMs(modelId = null) { + return getMinWait(this.#accounts, modelId); } /** @@ -251,9 +259,16 @@ export class AccountManager { */ getStatus() { const available = this.getAvailableAccounts(); - const rateLimited = this.#accounts.filter(a => a.isRateLimited); const invalid = this.getInvalidAccounts(); + // Count accounts that have any active model-specific rate limits + const rateLimited = this.#accounts.filter(a => { + if (!a.modelRateLimits) return false; + return Object.values(a.modelRateLimits).some( + limit => limit.isRateLimited && limit.resetTime > Date.now() + ); + }); + return { total: this.#accounts.length, available: available.length, @@ -263,8 +278,7 @@ export class AccountManager { accounts: this.#accounts.map(a => ({ email: a.email, source: a.source, - isRateLimited: a.isRateLimited, - rateLimitResetTime: a.rateLimitResetTime, + modelRateLimits: a.modelRateLimits || {}, isInvalid: a.isInvalid || false, invalidReason: a.invalidReason || null, lastUsed: a.lastUsed diff --git a/src/account-manager/rate-limits.js b/src/account-manager/rate-limits.js index 1222672..fe756fa 100644 --- a/src/account-manager/rate-limits.js +++ b/src/account-manager/rate-limits.js @@ -2,6 +2,7 @@ * Rate Limit Management * * Handles rate limit tracking and state management for accounts. + * All rate limits are model-specific. */ import { DEFAULT_COOLDOWN_MS } from '../constants.js'; @@ -9,24 +10,44 @@ import { formatDuration } from '../utils/helpers.js'; import { logger } from '../utils/logger.js'; /** - * Check if all accounts are rate-limited + * Check if all accounts are rate-limited for a specific model * * @param {Array} accounts - Array of account objects + * @param {string} modelId - Model ID to check rate limits for * @returns {boolean} True if all accounts are rate-limited */ -export function isAllRateLimited(accounts) { +export function isAllRateLimited(accounts, modelId) { if (accounts.length === 0) return true; - return accounts.every(acc => acc.isRateLimited); + if (!modelId) return false; // No model specified = not rate limited + + return accounts.every(acc => { + if (acc.isInvalid) return true; // Invalid accounts count as unavailable + const modelLimits = acc.modelRateLimits || {}; + const limit = modelLimits[modelId]; + return limit && limit.isRateLimited && limit.resetTime > Date.now(); + }); } /** - * Get list of available (non-rate-limited, non-invalid) accounts + * Get list of available (non-rate-limited, non-invalid) accounts for a model * * @param {Array} accounts - Array of account objects + * @param {string} [modelId] - Model ID to filter by * @returns {Array} Array of available account objects */ -export function getAvailableAccounts(accounts) { - return accounts.filter(acc => !acc.isRateLimited && !acc.isInvalid); +export function getAvailableAccounts(accounts, modelId = null) { + return accounts.filter(acc => { + if (acc.isInvalid) return false; + + if (modelId && acc.modelRateLimits && acc.modelRateLimits[modelId]) { + const limit = acc.modelRateLimits[modelId]; + if (limit.isRateLimited && limit.resetTime > Date.now()) { + return false; + } + } + + return true; + }); } /** @@ -50,11 +71,15 @@ export function clearExpiredLimits(accounts) { let cleared = 0; for (const account of accounts) { - if (account.isRateLimited && account.rateLimitResetTime && account.rateLimitResetTime <= now) { - account.isRateLimited = false; - account.rateLimitResetTime = null; - cleared++; - logger.success(`[AccountManager] Rate limit expired for: ${account.email}`); + if (account.modelRateLimits) { + for (const [modelId, limit] of Object.entries(account.modelRateLimits)) { + if (limit.isRateLimited && limit.resetTime <= now) { + limit.isRateLimited = false; + limit.resetTime = null; + cleared++; + logger.success(`[AccountManager] Rate limit expired for: ${account.email} (model: ${modelId})`); + } + } } } @@ -68,31 +93,43 @@ export function clearExpiredLimits(accounts) { */ export function resetAllRateLimits(accounts) { for (const account of accounts) { - account.isRateLimited = false; - account.rateLimitResetTime = null; + if (account.modelRateLimits) { + for (const key of Object.keys(account.modelRateLimits)) { + account.modelRateLimits[key] = { isRateLimited: false, resetTime: null }; + } + } } logger.warn('[AccountManager] Reset all rate limits for optimistic retry'); } /** - * Mark an account as rate-limited + * Mark an account as rate-limited for a specific model * * @param {Array} accounts - Array of account objects * @param {string} email - Email of the account to mark - * @param {number|null} resetMs - Time in ms until rate limit resets (optional) + * @param {number|null} resetMs - Time in ms until rate limit resets * @param {Object} settings - Settings object with cooldownDurationMs + * @param {string} modelId - Model ID to mark rate limit for * @returns {boolean} True if account was found and marked */ -export function markRateLimited(accounts, email, resetMs = null, settings = {}) { +export function markRateLimited(accounts, email, resetMs = null, settings = {}, modelId) { const account = accounts.find(a => a.email === email); if (!account) return false; - account.isRateLimited = true; const cooldownMs = resetMs || settings.cooldownDurationMs || DEFAULT_COOLDOWN_MS; - account.rateLimitResetTime = Date.now() + cooldownMs; + const resetTime = Date.now() + cooldownMs; + + if (!account.modelRateLimits) { + account.modelRateLimits = {}; + } + + account.modelRateLimits[modelId] = { + isRateLimited: true, + resetTime: resetTime + }; logger.warn( - `[AccountManager] Rate limited: ${email}. Available in ${formatDuration(cooldownMs)}` + `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(cooldownMs)}` ); return true; @@ -128,24 +165,28 @@ export function markInvalid(accounts, email, reason = 'Unknown error') { } /** - * Get the minimum wait time until any account becomes available + * Get the minimum wait time until any account becomes available for a model * * @param {Array} accounts - Array of account objects + * @param {string} modelId - Model ID to check * @returns {number} Wait time in milliseconds */ -export function getMinWaitTimeMs(accounts) { - if (!isAllRateLimited(accounts)) return 0; +export function getMinWaitTimeMs(accounts, modelId) { + if (!isAllRateLimited(accounts, modelId)) return 0; const now = Date.now(); let minWait = Infinity; let soonestAccount = null; for (const account of accounts) { - if (account.rateLimitResetTime) { - const wait = account.rateLimitResetTime - now; - if (wait > 0 && wait < minWait) { - minWait = wait; - soonestAccount = account; + if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) { + const limit = account.modelRateLimits[modelId]; + if (limit.isRateLimited && limit.resetTime) { + const wait = limit.resetTime - now; + if (wait > 0 && wait < minWait) { + minWait = wait; + soonestAccount = account; + } } } } diff --git a/src/account-manager/selection.js b/src/account-manager/selection.js index de5c480..ef4d8bd 100644 --- a/src/account-manager/selection.js +++ b/src/account-manager/selection.js @@ -2,6 +2,7 @@ * Account Selection * * Handles account picking logic (round-robin, sticky) for cache continuity. + * All rate limit checks are model-specific. */ import { MAX_WAIT_BEFORE_ERROR_MS } from '../constants.js'; @@ -9,18 +10,38 @@ import { formatDuration } from '../utils/helpers.js'; import { logger } from '../utils/logger.js'; import { clearExpiredLimits, getAvailableAccounts } from './rate-limits.js'; +/** + * Check if an account is usable for a specific model + * @param {Object} account - Account object + * @param {string} modelId - Model ID to check + * @returns {boolean} True if account is usable + */ +function isAccountUsable(account, modelId) { + if (!account || account.isInvalid) return false; + + if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) { + const limit = account.modelRateLimits[modelId]; + if (limit.isRateLimited && limit.resetTime > Date.now()) { + return false; + } + } + + return true; +} + /** * Pick the next available account (fallback when current is unavailable). * * @param {Array} accounts - Array of account objects * @param {number} currentIndex - Current account index * @param {Function} onSave - Callback to save changes + * @param {string} [modelId] - Model ID to check rate limits for * @returns {{account: Object|null, newIndex: number}} The next available account and new index */ -export function pickNext(accounts, currentIndex, onSave) { +export function pickNext(accounts, currentIndex, onSave, modelId = null) { clearExpiredLimits(accounts); - const available = getAvailableAccounts(accounts); + const available = getAvailableAccounts(accounts, modelId); if (available.length === 0) { return { account: null, newIndex: currentIndex }; } @@ -36,7 +57,7 @@ export function pickNext(accounts, currentIndex, onSave) { const idx = (index + i) % accounts.length; const account = accounts[idx]; - if (!account.isRateLimited && !account.isInvalid) { + if (isAccountUsable(account, modelId)) { account.lastUsed = Date.now(); const position = idx + 1; @@ -59,9 +80,10 @@ export function pickNext(accounts, currentIndex, onSave) { * @param {Array} accounts - Array of account objects * @param {number} currentIndex - Current account index * @param {Function} onSave - Callback to save changes + * @param {string} [modelId] - Model ID to check rate limits for * @returns {{account: Object|null, newIndex: number}} The current account and index */ -export function getCurrentStickyAccount(accounts, currentIndex, onSave) { +export function getCurrentStickyAccount(accounts, currentIndex, onSave, modelId = null) { clearExpiredLimits(accounts); if (accounts.length === 0) { @@ -77,8 +99,7 @@ export function getCurrentStickyAccount(accounts, currentIndex, onSave) { // Get current account directly (activeIndex = current account) const account = accounts[index]; - // Return if available - if (account && !account.isRateLimited && !account.isInvalid) { + if (isAccountUsable(account, modelId)) { account.lastUsed = Date.now(); // Trigger save (don't await to avoid blocking) if (onSave) onSave(); @@ -93,9 +114,10 @@ export function getCurrentStickyAccount(accounts, currentIndex, onSave) { * * @param {Array} accounts - Array of account objects * @param {number} currentIndex - Current account index + * @param {string} [modelId] - Model ID to check rate limits for * @returns {{shouldWait: boolean, waitMs: number, account: Object|null}} */ -export function shouldWaitForCurrentAccount(accounts, currentIndex) { +export function shouldWaitForCurrentAccount(accounts, currentIndex, modelId = null) { if (accounts.length === 0) { return { shouldWait: false, waitMs: 0, account: null }; } @@ -113,15 +135,21 @@ export function shouldWaitForCurrentAccount(accounts, currentIndex) { return { shouldWait: false, waitMs: 0, account: null }; } - if (account.isRateLimited && account.rateLimitResetTime) { - const waitMs = account.rateLimitResetTime - Date.now(); + let waitMs = 0; - // If wait time is within threshold, recommend waiting - if (waitMs > 0 && waitMs <= MAX_WAIT_BEFORE_ERROR_MS) { - return { shouldWait: true, waitMs, account }; + // Check model-specific limit + if (modelId && account.modelRateLimits && account.modelRateLimits[modelId]) { + const limit = account.modelRateLimits[modelId]; + if (limit.isRateLimited && limit.resetTime) { + waitMs = limit.resetTime - Date.now(); } } + // If wait time is within threshold, recommend waiting + if (waitMs > 0 && waitMs <= MAX_WAIT_BEFORE_ERROR_MS) { + return { shouldWait: true, waitMs, account }; + } + return { shouldWait: false, waitMs: 0, account }; } @@ -132,21 +160,22 @@ export function shouldWaitForCurrentAccount(accounts, currentIndex) { * @param {Array} accounts - Array of account objects * @param {number} currentIndex - Current account index * @param {Function} onSave - Callback to save changes + * @param {string} [modelId] - Model ID to check rate limits for * @returns {{account: Object|null, waitMs: number, newIndex: number}} */ -export function pickStickyAccount(accounts, currentIndex, onSave) { +export function pickStickyAccount(accounts, currentIndex, onSave, modelId = null) { // First try to get the current sticky account - const { account: stickyAccount, newIndex: stickyIndex } = getCurrentStickyAccount(accounts, currentIndex, onSave); + const { account: stickyAccount, newIndex: stickyIndex } = getCurrentStickyAccount(accounts, currentIndex, onSave, modelId); if (stickyAccount) { return { account: stickyAccount, waitMs: 0, newIndex: stickyIndex }; } // Current account is rate-limited or invalid. // CHECK IF OTHERS ARE AVAILABLE before deciding to wait. - const available = getAvailableAccounts(accounts); + const available = getAvailableAccounts(accounts, modelId); if (available.length > 0) { // Found a free account! Switch immediately. - const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave); + const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave, modelId); if (nextAccount) { logger.info(`[AccountManager] Switched to new account (failover): ${nextAccount.email}`); return { account: nextAccount, waitMs: 0, newIndex }; @@ -154,14 +183,14 @@ export function pickStickyAccount(accounts, currentIndex, onSave) { } // No other accounts available. Now checking if we should wait for current account. - const waitInfo = shouldWaitForCurrentAccount(accounts, currentIndex); + const waitInfo = shouldWaitForCurrentAccount(accounts, currentIndex, modelId); if (waitInfo.shouldWait) { logger.info(`[AccountManager] Waiting ${formatDuration(waitInfo.waitMs)} for sticky account: ${waitInfo.account.email}`); return { account: null, waitMs: waitInfo.waitMs, newIndex: currentIndex }; } // Current account unavailable for too long/invalid, and no others available? - const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave); + const { account: nextAccount, newIndex } = pickNext(accounts, currentIndex, onSave, modelId); if (nextAccount) { logger.info(`[AccountManager] Switched to new account for cache: ${nextAccount.email}`); } diff --git a/src/account-manager/storage.js b/src/account-manager/storage.js index 30bb6fe..b8ee6b8 100644 --- a/src/account-manager/storage.js +++ b/src/account-manager/storage.js @@ -26,12 +26,11 @@ export async function loadAccounts(configPath = ACCOUNT_CONFIG_PATH) { const accounts = (config.accounts || []).map(acc => ({ ...acc, - isRateLimited: acc.isRateLimited || false, - rateLimitResetTime: acc.rateLimitResetTime || null, lastUsed: acc.lastUsed || null, // Reset invalid flag on startup - give accounts a fresh chance to refresh isInvalid: false, - invalidReason: null + invalidReason: null, + modelRateLimits: acc.modelRateLimits || {} })); const settings = config.settings || {}; @@ -69,9 +68,8 @@ export function loadDefaultAccount(dbPath) { const account = { email: authData.email || 'default@antigravity', source: 'database', - isRateLimited: false, - rateLimitResetTime: null, - lastUsed: null + lastUsed: null, + modelRateLimits: {} }; const tokenCache = new Map(); @@ -114,10 +112,9 @@ export async function saveAccounts(configPath, accounts, settings, activeIndex) apiKey: acc.source === 'manual' ? acc.apiKey : undefined, projectId: acc.projectId || undefined, addedAt: acc.addedAt || undefined, - isRateLimited: acc.isRateLimited, - rateLimitResetTime: acc.rateLimitResetTime, isInvalid: acc.isInvalid || false, invalidReason: acc.invalidReason || null, + modelRateLimits: acc.modelRateLimits || {}, lastUsed: acc.lastUsed })), settings: settings, diff --git a/src/cli/accounts.js b/src/cli/accounts.js index eee8013..9a7e3db 100644 --- a/src/cli/accounts.js +++ b/src/cli/accounts.js @@ -138,8 +138,7 @@ function saveAccounts(accounts, settings = {}) { projectId: acc.projectId, addedAt: acc.addedAt || new Date().toISOString(), lastUsed: acc.lastUsed || null, - isRateLimited: acc.isRateLimited || false, - rateLimitResetTime: acc.rateLimitResetTime || null + modelRateLimits: acc.modelRateLimits || {} })), settings: { cooldownDurationMs: 60000, @@ -168,7 +167,11 @@ function displayAccounts(accounts) { console.log(`\n${accounts.length} account(s) saved:`); accounts.forEach((acc, i) => { - const status = acc.isRateLimited ? ' (rate-limited)' : ''; + // Check for any active model-specific rate limits + const hasActiveLimit = Object.values(acc.modelRateLimits || {}).some( + limit => limit.isRateLimited && limit.resetTime > Date.now() + ); + const status = hasActiveLimit ? ' (rate-limited)' : ''; console.log(` ${i + 1}. ${acc.email}${status}`); }); } @@ -218,8 +221,7 @@ async function addAccount(existingAccounts) { refreshToken: result.refreshToken, projectId: result.projectId, addedAt: new Date().toISOString(), - isRateLimited: false, - rateLimitResetTime: null + modelRateLimits: {} }; } catch (error) { console.error(`\nāœ— Authentication failed: ${error.message}`); @@ -280,7 +282,7 @@ async function interactiveAdd(rl) { if (accounts.length > 0) { displayAccounts(accounts); - const choice = await rl.question('\n(a)dd new, (r)emove existing, or (f)resh start? [a/r/f]: '); + const choice = await rl.question('\n(a)dd new, (r)emove existing, (f)resh start, or (e)xit? [a/r/f/e]: '); const c = choice.toLowerCase(); if (c === 'r') { @@ -291,36 +293,32 @@ async function interactiveAdd(rl) { accounts.length = 0; } else if (c === 'a') { console.log('\nAdding to existing accounts.'); + } else if (c === 'e') { + console.log('\nExiting...'); + return; // Exit cleanly } else { console.log('\nInvalid choice, defaulting to add.'); } } - // Add accounts loop - while (accounts.length < MAX_ACCOUNTS) { - const newAccount = await addAccount(accounts); - if (newAccount) { - accounts.push(newAccount); - // Auto-save after each successful add to prevent data loss - saveAccounts(accounts); - } else if (accounts.length > 0) { - // Even if newAccount is null (duplicate update), save the updated accounts - saveAccounts(accounts); - } + // Add single account + if (accounts.length >= MAX_ACCOUNTS) { + console.log(`\nMaximum of ${MAX_ACCOUNTS} accounts reached.`); + return; + } - if (accounts.length >= MAX_ACCOUNTS) { - console.log(`\nMaximum of ${MAX_ACCOUNTS} accounts reached.`); - break; - } - - const addMore = await rl.question('\nAdd another account? [y/N]: '); - if (addMore.toLowerCase() !== 'y') { - break; - } + const newAccount = await addAccount(accounts); + if (newAccount) { + accounts.push(newAccount); + saveAccounts(accounts); + } else if (accounts.length > 0) { + // Even if newAccount is null (duplicate update), save the updated accounts + saveAccounts(accounts); } if (accounts.length > 0) { displayAccounts(accounts); + console.log('\nTo add more accounts, run this command again.'); } else { console.log('\nNo accounts to save.'); } @@ -431,6 +429,8 @@ async function main() { } } finally { rl.close(); + // Force exit to prevent hanging + process.exit(0); } } diff --git a/src/cloudcode/message-handler.js b/src/cloudcode/message-handler.js index 9d64c17..4491afc 100644 --- a/src/cloudcode/message-handler.js +++ b/src/cloudcode/message-handler.js @@ -13,28 +13,12 @@ import { } from '../constants.js'; import { convertGoogleToAnthropic } from '../format/index.js'; import { isRateLimitError, isAuthError } from '../errors.js'; -import { formatDuration, sleep } from '../utils/helpers.js'; +import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js'; import { logger } from '../utils/logger.js'; import { parseResetTime } from './rate-limit-parser.js'; import { buildCloudCodeRequest, buildHeaders } from './request-builder.js'; import { parseThinkingSSEResponse } from './sse-parser.js'; -/** - * Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED) - * @deprecated Use isRateLimitError from errors.js instead - */ -function is429Error(error) { - return isRateLimitError(error); -} - -/** - * Check if an error is an auth-invalid error (credentials need re-authentication) - * @deprecated Use isAuthError from errors.js instead - */ -function isAuthInvalidError(error) { - return isAuthError(error); -} - /** * Send a non-streaming request to Cloud Code with multi-account support * Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks) @@ -59,7 +43,7 @@ export async function sendMessage(anthropicRequest, accountManager) { for (let attempt = 0; attempt < maxAttempts; attempt++) { // Use sticky account selection for cache continuity - const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(); + const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model); let account = stickyAccount; // Handle waiting for sticky account @@ -67,19 +51,19 @@ export async function sendMessage(anthropicRequest, accountManager) { logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`); await sleep(waitMs); accountManager.clearExpiredLimits(); - account = accountManager.getCurrentStickyAccount(); + account = accountManager.getCurrentStickyAccount(model); } // Handle all accounts rate-limited if (!account) { - if (accountManager.isAllRateLimited()) { - const allWaitMs = accountManager.getMinWaitTimeMs(); + if (accountManager.isAllRateLimited(model)) { + const allWaitMs = accountManager.getMinWaitTimeMs(model); const resetTime = new Date(Date.now() + allWaitMs).toISOString(); // If wait time is too long (> 2 minutes), throw error immediately if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { throw new Error( - `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}` + `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}` ); } @@ -88,7 +72,7 @@ export async function sendMessage(anthropicRequest, accountManager) { logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`); await sleep(allWaitMs); accountManager.clearExpiredLimits(); - account = accountManager.pickNext(); + account = accountManager.pickNext(model); } if (!account) { @@ -163,7 +147,7 @@ export async function sendMessage(anthropicRequest, accountManager) { return convertGoogleToAnthropic(data, anthropicRequest.model); } catch (endpointError) { - if (is429Error(endpointError)) { + if (isRateLimitError(endpointError)) { throw endpointError; // Re-throw to trigger account switch } logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message); @@ -176,19 +160,19 @@ export async function sendMessage(anthropicRequest, accountManager) { // If all endpoints returned 429, mark account as rate-limited if (lastError.is429) { logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`); - accountManager.markRateLimited(account.email, lastError.resetMs); + accountManager.markRateLimited(account.email, lastError.resetMs, model); throw new Error(`Rate limited: ${lastError.errorText}`); } throw lastError; } } catch (error) { - if (is429Error(error)) { + if (isRateLimitError(error)) { // Rate limited - already marked, continue to next account logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`); continue; } - if (isAuthInvalidError(error)) { + if (isAuthError(error)) { // Auth invalid - already marked, continue to next account logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`); continue; @@ -197,10 +181,17 @@ export async function sendMessage(anthropicRequest, accountManager) { // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) { logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`); - accountManager.pickNext(); // Force advance to next account + accountManager.pickNext(model); // Force advance to next account continue; } + if (isNetworkError(error)) { + logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`); + await sleep(1000); // Brief pause before retry + accountManager.pickNext(model); // Advance to next account + continue; + } + throw error; } } diff --git a/src/cloudcode/model-api.js b/src/cloudcode/model-api.js index 06ce4c1..e8bd9e3 100644 --- a/src/cloudcode/model-api.js +++ b/src/cloudcode/model-api.js @@ -4,9 +4,19 @@ * Handles model listing and quota retrieval from the Cloud Code API. */ -import { ANTIGRAVITY_ENDPOINT_FALLBACKS, ANTIGRAVITY_HEADERS } from '../constants.js'; +import { ANTIGRAVITY_ENDPOINT_FALLBACKS, ANTIGRAVITY_HEADERS, getModelFamily } from '../constants.js'; import { logger } from '../utils/logger.js'; +/** + * Check if a model is supported (Claude or Gemini) + * @param {string} modelId - Model ID to check + * @returns {boolean} True if model is supported + */ +function isSupportedModel(modelId) { + const family = getModelFamily(modelId); + return family === 'claude' || family === 'gemini'; +} + /** * List available models in Anthropic API format * Fetches models dynamically from the Cloud Code API @@ -20,7 +30,9 @@ export async function listModels(token) { return { object: 'list', data: [] }; } - const modelList = Object.entries(data.models).map(([modelId, modelData]) => ({ + const modelList = Object.entries(data.models) + .filter(([modelId]) => isSupportedModel(modelId)) + .map(([modelId, modelData]) => ({ id: modelId, object: 'model', created: Math.floor(Date.now() / 1000), @@ -85,6 +97,9 @@ export async function getModelQuotas(token) { const quotas = {}; for (const [modelId, modelData] of Object.entries(data.models)) { + // Only include Claude and Gemini models + if (!isSupportedModel(modelId)) continue; + if (modelData.quotaInfo) { quotas[modelId] = { remainingFraction: modelData.quotaInfo.remainingFraction ?? null, diff --git a/src/cloudcode/streaming-handler.js b/src/cloudcode/streaming-handler.js index 06ab484..f3af687 100644 --- a/src/cloudcode/streaming-handler.js +++ b/src/cloudcode/streaming-handler.js @@ -11,27 +11,12 @@ import { MAX_WAIT_BEFORE_ERROR_MS } from '../constants.js'; import { isRateLimitError, isAuthError } from '../errors.js'; -import { formatDuration, sleep } from '../utils/helpers.js'; +import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js'; import { logger } from '../utils/logger.js'; import { parseResetTime } from './rate-limit-parser.js'; import { buildCloudCodeRequest, buildHeaders } from './request-builder.js'; import { streamSSEResponse } from './sse-streamer.js'; -/** - * Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED) - * @deprecated Use isRateLimitError from errors.js instead - */ -function is429Error(error) { - return isRateLimitError(error); -} - -/** - * Check if an error is an auth-invalid error (credentials need re-authentication) - * @deprecated Use isAuthError from errors.js instead - */ -function isAuthInvalidError(error) { - return isAuthError(error); -} /** * Send a streaming request to Cloud Code with multi-account support @@ -56,7 +41,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { for (let attempt = 0; attempt < maxAttempts; attempt++) { // Use sticky account selection for cache continuity - const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(); + const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model); let account = stickyAccount; // Handle waiting for sticky account @@ -64,19 +49,19 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`); await sleep(waitMs); accountManager.clearExpiredLimits(); - account = accountManager.getCurrentStickyAccount(); + account = accountManager.getCurrentStickyAccount(model); } // Handle all accounts rate-limited if (!account) { - if (accountManager.isAllRateLimited()) { - const allWaitMs = accountManager.getMinWaitTimeMs(); + if (accountManager.isAllRateLimited(model)) { + const allWaitMs = accountManager.getMinWaitTimeMs(model); const resetTime = new Date(Date.now() + allWaitMs).toISOString(); // If wait time is too long (> 2 minutes), throw error immediately if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { throw new Error( - `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}` + `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}` ); } @@ -85,7 +70,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`); await sleep(allWaitMs); accountManager.clearExpiredLimits(); - account = accountManager.pickNext(); + account = accountManager.pickNext(model); } if (!account) { @@ -153,7 +138,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { return; } catch (endpointError) { - if (is429Error(endpointError)) { + if (isRateLimitError(endpointError)) { throw endpointError; // Re-throw to trigger account switch } logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message); @@ -166,19 +151,19 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { // If all endpoints returned 429, mark account as rate-limited if (lastError.is429) { logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`); - accountManager.markRateLimited(account.email, lastError.resetMs); + accountManager.markRateLimited(account.email, lastError.resetMs, model); throw new Error(`Rate limited: ${lastError.errorText}`); } throw lastError; } } catch (error) { - if (is429Error(error)) { + if (isRateLimitError(error)) { // Rate limited - already marked, continue to next account logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`); continue; } - if (isAuthInvalidError(error)) { + if (isAuthError(error)) { // Auth invalid - already marked, continue to next account logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`); continue; @@ -187,10 +172,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) { logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`); - accountManager.pickNext(); // Force advance to next account + accountManager.pickNext(model); // Force advance to next account continue; } + if (isNetworkError(error)) { + logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`); + await sleep(1000); // Brief pause before retry + accountManager.pickNext(model); // Advance to next account + continue; + } + throw error; } } diff --git a/src/format/content-converter.js b/src/format/content-converter.js index 5c4de48..052eb0a 100644 --- a/src/format/content-converter.js +++ b/src/format/content-converter.js @@ -37,6 +37,8 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo const parts = []; for (const block of content) { + if (!block) continue; + if (block.type === 'text') { // Skip empty text blocks - they cause API errors if (block.text && block.text.trim()) { diff --git a/src/format/request-converter.js b/src/format/request-converter.js index fd6d9d6..17e67ed 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -152,6 +152,16 @@ export function convertAnthropicToGoogle(anthropicRequest) { if (thinkingBudget) { thinkingConfig.thinking_budget = thinkingBudget; logger.debug(`[RequestConverter] Claude thinking enabled with budget: ${thinkingBudget}`); + + // Validate max_tokens > thinking_budget as required by the API + const currentMaxTokens = googleRequest.generationConfig.maxOutputTokens; + if (currentMaxTokens && currentMaxTokens <= thinkingBudget) { + // Bump max_tokens to allow for some response content + // Default to budget + 8192 (standard output buffer) + const adjustedMaxTokens = thinkingBudget + 8192; + logger.warn(`[RequestConverter] max_tokens (${currentMaxTokens}) <= thinking_budget (${thinkingBudget}). Adjusting to ${adjustedMaxTokens} to satisfy API requirements`); + googleRequest.generationConfig.maxOutputTokens = adjustedMaxTokens; + } } else { logger.debug('[RequestConverter] Claude thinking enabled (no budget specified)'); } diff --git a/src/index.js b/src/index.js index 3cab6d6..3c71759 100644 --- a/src/index.js +++ b/src/index.js @@ -30,29 +30,47 @@ app.listen(PORT, () => { // Clear console for a clean start console.clear(); + const border = 'ā•‘'; + // align for 2-space indent (60 chars), align4 for 4-space indent (58 chars) + const align = (text) => text + ' '.repeat(Math.max(0, 60 - text.length)); + const align4 = (text) => text + ' '.repeat(Math.max(0, 58 - text.length)); + + // Build Control section dynamically + let controlSection = 'ā•‘ Control: ā•‘\n'; + if (!isDebug) { + controlSection += 'ā•‘ --debug Enable debug logging ā•‘\n'; + } + controlSection += 'ā•‘ Ctrl+C Stop server ā•‘'; + + // Build status section if debug mode is active + let statusSection = ''; + if (isDebug) { + statusSection = 'ā•‘ ā•‘\n'; + statusSection += 'ā•‘ Active Modes: ā•‘\n'; + statusSection += 'ā•‘ āœ“ Debug mode enabled ā•‘\n'; + } + logger.log(` ╔══════════════════════════════════════════════════════════════╗ ā•‘ Antigravity Claude Proxy Server ā•‘ ╠══════════════════════════════════════════════════════════════╣ ā•‘ ā•‘ -ā•‘ Server running at: http://localhost:${PORT} ā•‘ -ā•‘ ā•‘ -ā•‘ Control: ā•‘ -ā•‘ --debug Enable debug logging ā•‘ -ā•‘ Ctrl+C Stop server ā•‘ +${border} ${align(`Server running at: http://localhost:${PORT}`)}${border} +${statusSection}ā•‘ ā•‘ +${controlSection} ā•‘ ā•‘ ā•‘ Endpoints: ā•‘ -ā•‘ POST /v1/messages - Anthropic Messages API ā•‘ -ā•‘ GET /v1/models - List available models ā•‘ -ā•‘ GET /health - Health check ā•‘ -ā•‘ GET /account-limits - Account status & quotas ā•‘ -ā•‘ POST /refresh-token - Force token refresh ā•‘ +ā•‘ POST /v1/messages - Anthropic Messages API ā•‘ +ā•‘ GET /v1/models - List available models ā•‘ +ā•‘ GET /health - Health check ā•‘ +ā•‘ GET /account-limits - Account status & quotas ā•‘ +ā•‘ POST /refresh-token - Force token refresh ā•‘ ā•‘ ā•‘ -ā•‘ Configuration: ā•‘ -ā•‘ Storage: ${CONFIG_DIR} ā•‘ +${border} ${align(`Configuration:`)}${border} +${border} ${align4(`Storage: ${CONFIG_DIR}`)}${border} ā•‘ ā•‘ ā•‘ Usage with Claude Code: ā•‘ -ā•‘ export ANTHROPIC_BASE_URL=http://localhost:${PORT} ā•‘ +${border} ${align4(`export ANTHROPIC_BASE_URL=http://localhost:${PORT}`)}${border} ā•‘ export ANTHROPIC_API_KEY=dummy ā•‘ ā•‘ claude ā•‘ ā•‘ ā•‘ diff --git a/src/server.js b/src/server.js index 305062b..f0ccb5d 100644 --- a/src/server.js +++ b/src/server.js @@ -70,8 +70,9 @@ function parseError(error) { statusCode = 400; // Use 400 to ensure client does not retry (429 and 529 trigger retries) // Try to extract the quota reset time from the error - const resetMatch = error.message.match(/quota will reset after (\d+h\d+m\d+s|\d+m\d+s|\d+s)/i); - const modelMatch = error.message.match(/"model":\s*"([^"]+)"/); + const resetMatch = error.message.match(/quota will reset after ([\dh\dm\ds]+)/i); + // Try to extract model from our error format "Rate limited on " or JSON format + const modelMatch = error.message.match(/Rate limited on ([^.]+)\./) || error.message.match(/"model":\s*"([^"]+)"/); const model = modelMatch ? modelMatch[1] : 'the model'; if (resetMatch) { @@ -111,22 +112,107 @@ app.use((req, res, next) => { }); /** - * Health check endpoint + * Health check endpoint - Detailed status + * Returns status of all accounts including rate limits and model quotas */ app.get('/health', async (req, res) => { try { await ensureInitialized(); + const start = Date.now(); + + // Get high-level status first const status = accountManager.getStatus(); + const allAccounts = accountManager.getAllAccounts(); + + // Fetch quotas for each account in parallel to get detailed model info + const accountDetails = await Promise.allSettled( + allAccounts.map(async (account) => { + // Check model-specific rate limits + const activeModelLimits = Object.entries(account.modelRateLimits || {}) + .filter(([_, limit]) => limit.isRateLimited && limit.resetTime > Date.now()); + const isRateLimited = activeModelLimits.length > 0; + const soonestReset = activeModelLimits.length > 0 + ? Math.min(...activeModelLimits.map(([_, l]) => l.resetTime)) + : null; + + const baseInfo = { + email: account.email, + lastUsed: account.lastUsed ? new Date(account.lastUsed).toISOString() : null, + modelRateLimits: account.modelRateLimits || {}, + rateLimitCooldownRemaining: soonestReset ? Math.max(0, soonestReset - Date.now()) : 0 + }; + + // Skip invalid accounts for quota check + if (account.isInvalid) { + return { + ...baseInfo, + status: 'invalid', + error: account.invalidReason, + models: {} + }; + } + + try { + const token = await accountManager.getTokenForAccount(account); + const quotas = await getModelQuotas(token); + + // Format quotas for readability + const formattedQuotas = {}; + for (const [modelId, info] of Object.entries(quotas)) { + formattedQuotas[modelId] = { + remaining: info.remainingFraction !== null ? `${Math.round(info.remainingFraction * 100)}%` : 'N/A', + remainingFraction: info.remainingFraction, + resetTime: info.resetTime || null + }; + } + + return { + ...baseInfo, + status: isRateLimited ? 'rate-limited' : 'ok', + models: formattedQuotas + }; + } catch (error) { + return { + ...baseInfo, + status: 'error', + error: error.message, + models: {} + }; + } + }) + ); + + // Process results + const detailedAccounts = accountDetails.map((result, index) => { + if (result.status === 'fulfilled') { + return result.value; + } else { + const acc = allAccounts[index]; + return { + email: acc.email, + status: 'error', + error: result.reason?.message || 'Unknown error', + modelRateLimits: acc.modelRateLimits || {} + }; + } + }); res.json({ status: 'ok', - accounts: status.summary, - available: status.available, - rateLimited: status.rateLimited, - invalid: status.invalid, - timestamp: new Date().toISOString() + timestamp: new Date().toISOString(), + latencyMs: Date.now() - start, + summary: status.summary, + counts: { + total: status.total, + available: status.available, + rateLimited: status.rateLimited, + invalid: status.invalid + }, + accounts: detailedAccounts }); + } catch (error) { + logger.error('[API] Health check failed:', error); res.status(503).json({ status: 'error', error: error.message, @@ -236,11 +322,21 @@ app.get('/account-limits', async (req, res) => { let accStatus; if (acc.isInvalid) { accStatus = 'invalid'; - } else if (acc.isRateLimited) { - const remaining = acc.rateLimitResetTime ? acc.rateLimitResetTime - Date.now() : 0; - accStatus = remaining > 0 ? `limited (${formatDuration(remaining)})` : 'rate-limited'; + } else if (accLimit?.status === 'error') { + accStatus = 'error'; } else { - accStatus = accLimit?.status || 'ok'; + // Count exhausted models (0% or null remaining) + const models = accLimit?.models || {}; + const modelCount = Object.keys(models).length; + const exhaustedCount = Object.values(models).filter( + q => q.remainingFraction === 0 || q.remainingFraction === null + ).length; + + if (exhaustedCount === 0) { + accStatus = 'ok'; + } else { + accStatus = `(${exhaustedCount}/${modelCount}) limited`; + } } // Get reset time from quota API @@ -262,14 +358,14 @@ app.get('/account-limits', async (req, res) => { } lines.push(''); - // Calculate column widths - const modelColWidth = Math.max(25, ...sortedModels.map(m => m.length)) + 2; - const accountColWidth = 22; + // Calculate column widths - need more space for reset time info + const modelColWidth = Math.max(28, ...sortedModels.map(m => m.length)) + 2; + const accountColWidth = 30; // Header row let header = 'Model'.padEnd(modelColWidth); for (const acc of accountLimits) { - const shortEmail = acc.email.split('@')[0].slice(0, 18); + const shortEmail = acc.email.split('@')[0].slice(0, 26); header += shortEmail.padEnd(accountColWidth); } lines.push(header); @@ -281,12 +377,22 @@ app.get('/account-limits', async (req, res) => { for (const acc of accountLimits) { const quota = acc.models?.[modelId]; let cell; - if (acc.status !== 'ok') { + if (acc.status !== 'ok' && acc.status !== 'rate-limited') { cell = `[${acc.status}]`; } else if (!quota) { cell = '-'; - } else if (quota.remainingFraction === null) { - cell = '0% (exhausted)'; + } else if (quota.remainingFraction === 0 || quota.remainingFraction === null) { + // Show reset time for exhausted models + if (quota.resetTime) { + const resetMs = new Date(quota.resetTime).getTime() - Date.now(); + if (resetMs > 0) { + cell = `0% (wait ${formatDuration(resetMs)})`; + } else { + cell = '0% (resetting...)'; + } + } else { + cell = '0% (exhausted)'; + } } else { const pct = Math.round(quota.remainingFraction * 100); cell = `${pct}%`; @@ -404,17 +510,17 @@ app.post('/v1/messages/count_tokens', (req, res) => { /** * Main messages endpoint - Anthropic Messages API compatible */ + + +/** + * Anthropic-compatible Messages API + * POST /v1/messages + */ app.post('/v1/messages', async (req, res) => { try { // Ensure account manager is initialized await ensureInitialized(); - // Optimistic Retry: If ALL accounts are rate-limited, reset them to force a fresh check. - // If we have some available accounts, we try them first. - if (accountManager.isAllRateLimited()) { - logger.warn('[Server] All accounts rate-limited. Resetting state for optimistic retry.'); - accountManager.resetAllRateLimits(); - } const { model, @@ -430,6 +536,14 @@ app.post('/v1/messages', async (req, res) => { temperature } = req.body; + // Optimistic Retry: If ALL accounts are rate-limited for this model, reset them to force a fresh check. + // If we have some available accounts, we try them first. + const modelId = model || 'claude-3-5-sonnet-20241022'; + if (accountManager.isAllRateLimited(modelId)) { + logger.warn(`[Server] All accounts rate-limited for ${modelId}. Resetting state for optimistic retry.`); + accountManager.resetAllRateLimits(); + } + // Validate required fields if (!messages || !Array.isArray(messages)) { return res.status(400).json({ diff --git a/src/utils/helpers.js b/src/utils/helpers.js index 0279253..396854f 100644 --- a/src/utils/helpers.js +++ b/src/utils/helpers.js @@ -23,6 +23,7 @@ export function formatDuration(ms) { return `${secs}s`; } + /** * Sleep for specified milliseconds * @param {number} ms - Duration to sleep in milliseconds @@ -31,3 +32,49 @@ export function formatDuration(ms) { export function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } + +/** + * Check if an error is a network error (transient) + * @param {Error} error - The error to check + * @returns {boolean} True if it is a network error + */ +export function isNetworkError(error) { + const msg = error.message.toLowerCase(); + return ( + msg.includes('fetch failed') || + msg.includes('network error') || + msg.includes('econnreset') || + msg.includes('etimedout') || + msg.includes('socket hang up') || + msg.includes('timeout') + ); +} + +/** + * Check if an error is an authentication error (permanent until fixed) + * @param {Error} error - The error to check + * @returns {boolean} True if it is an auth error + */ +export function isAuthError(error) { + const msg = error.message.toLowerCase(); + return ( + msg.includes('401') || + msg.includes('unauthenticated') || + msg.includes('invalid_grant') || + msg.includes('invalid_client') + ); +} + +/** + * Check if an error is a rate limit error + * @param {Error} error - The error to check + * @returns {boolean} True if it is a rate limit error + */ +export function isRateLimitError(error) { + const msg = error.message.toLowerCase(); + return ( + msg.includes('429') || + msg.includes('resource_exhausted') || + msg.includes('quota_exhausted') + ); +}