diff --git a/src/account-manager/index.js b/src/account-manager/index.js index 79212ea..8db296d 100644 --- a/src/account-manager/index.js +++ b/src/account-manager/index.js @@ -14,7 +14,8 @@ import { resetAllRateLimits as resetLimits, markRateLimited as markLimited, markInvalid as markAccountInvalid, - getMinWaitTimeMs as getMinWait + getMinWaitTimeMs as getMinWait, + getRateLimitInfo as getLimitInfo } from './rate-limits.js'; import { getTokenForAccount as fetchToken, @@ -214,6 +215,16 @@ export class AccountManager { return getMinWait(this.#accounts, modelId); } + /** + * Get rate limit info for a specific account and model + * @param {string} email - Email of the account + * @param {string} modelId - Model ID to check + * @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info + */ + getRateLimitInfo(email, modelId) { + return getLimitInfo(this.#accounts, email, modelId); + } + /** * Get OAuth token for an account * @param {Object} account - Account object with email and credentials diff --git a/src/account-manager/rate-limits.js b/src/account-manager/rate-limits.js index ffe75e6..0dca07a 100644 --- a/src/account-manager/rate-limits.js +++ b/src/account-manager/rate-limits.js @@ -22,6 +22,7 @@ export function isAllRateLimited(accounts, modelId) { return accounts.every(acc => { if (acc.isInvalid) return true; // Invalid accounts count as unavailable + if (acc.enabled === false) return true; // Disabled accounts count as unavailable const modelLimits = acc.modelRateLimits || {}; const limit = modelLimits[modelId]; return limit && limit.isRateLimited && limit.resetTime > Date.now(); @@ -118,18 +119,9 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) { const account = accounts.find(a => a.email === email); if (!account) return false; - // Use configured cooldown as the maximum wait time - // If API returns a reset time, cap it at DEFAULT_COOLDOWN_MS - // If API doesn't return a reset time, use DEFAULT_COOLDOWN_MS - let cooldownMs; - if (resetMs && resetMs > 0) { - // API provided a reset time - cap it at configured maximum - cooldownMs = Math.min(resetMs, DEFAULT_COOLDOWN_MS); - } else { - // No reset time from API - use configured default - cooldownMs = DEFAULT_COOLDOWN_MS; - } - const resetTime = Date.now() + cooldownMs; + // Store the ACTUAL reset time from the API + // This is used to decide whether to wait (short) or switch accounts (long) + const actualResetMs = (resetMs && resetMs > 0) ? resetMs : DEFAULT_COOLDOWN_MS; if (!account.modelRateLimits) { account.modelRateLimits = {}; @@ -137,12 +129,20 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) { account.modelRateLimits[modelId] = { isRateLimited: true, - resetTime: resetTime + resetTime: Date.now() + actualResetMs, // Actual reset time for decisions + actualResetMs: actualResetMs // Original duration from API }; - logger.warn( - `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(cooldownMs)}` - ); + // Log appropriately based on duration + if (actualResetMs > DEFAULT_COOLDOWN_MS) { + logger.warn( + `[AccountManager] Quota exhausted: ${email} (model: ${modelId}). Resets in ${formatDuration(actualResetMs)}` + ); + } else { + logger.warn( + `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(actualResetMs)}` + ); + } return true; } @@ -209,3 +209,29 @@ export function getMinWaitTimeMs(accounts, modelId) { return minWait === Infinity ? DEFAULT_COOLDOWN_MS : minWait; } + +/** + * Get the rate limit info for a specific account and model + * Returns the actual reset time from API, not capped + * + * @param {Array} accounts - Array of account objects + * @param {string} email - Email of the account + * @param {string} modelId - Model ID to check + * @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info + */ +export function getRateLimitInfo(accounts, email, modelId) { + const account = accounts.find(a => a.email === email); + if (!account || !account.modelRateLimits || !account.modelRateLimits[modelId]) { + return { isRateLimited: false, actualResetMs: null, waitMs: 0 }; + } + + const limit = account.modelRateLimits[modelId]; + const now = Date.now(); + const waitMs = limit.resetTime ? Math.max(0, limit.resetTime - now) : 0; + + return { + isRateLimited: limit.isRateLimited && waitMs > 0, + actualResetMs: limit.actualResetMs || null, + waitMs + }; +} diff --git a/src/cloudcode/message-handler.js b/src/cloudcode/message-handler.js index 596229d..70598b3 100644 --- a/src/cloudcode/message-handler.js +++ b/src/cloudcode/message-handler.js @@ -9,6 +9,7 @@ import { ANTIGRAVITY_ENDPOINT_FALLBACKS, MAX_RETRIES, MAX_WAIT_BEFORE_ERROR_MS, + DEFAULT_COOLDOWN_MS, isThinkingModel } from '../constants.js'; import { convertGoogleToAnthropic } from '../format/index.js'; @@ -39,67 +40,56 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab // Retry loop with account failover // Ensure we try at least as many times as there are accounts to cycle through everyone - // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1); for (let attempt = 0; attempt < maxAttempts; attempt++) { - // Use sticky account selection for cache continuity - const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model); - let account = stickyAccount; + // Clear any expired rate limits before picking + accountManager.clearExpiredLimits(); - // Handle waiting for sticky account - if (!account && waitMs > 0) { - logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`); - await sleep(waitMs); - accountManager.clearExpiredLimits(); - account = accountManager.getCurrentStickyAccount(model); - } + // Get available accounts for this model + const availableAccounts = accountManager.getAvailableAccounts(model); - // Handle all accounts rate-limited - if (!account) { + // If no accounts available, check if we should wait or throw error + if (availableAccounts.length === 0) { if (accountManager.isAllRateLimited(model)) { - const allWaitMs = accountManager.getMinWaitTimeMs(model); - const resetTime = new Date(Date.now() + allWaitMs).toISOString(); + const minWaitMs = accountManager.getMinWaitTimeMs(model); + const resetTime = new Date(Date.now() + minWaitMs).toISOString(); // If wait time is too long (> 2 minutes), throw error immediately - if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { + if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { throw new Error( - `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}` + `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}` ); } - // Wait for reset (applies to both single and multi-account modes) + // Wait for shortest reset time const accountCount = accountManager.getAccountCount(); - logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`); - await sleep(allWaitMs); - - // Add small buffer after waiting to ensure rate limits have truly expired - await sleep(500); + logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`); + await sleep(minWaitMs + 500); // Add 500ms buffer accountManager.clearExpiredLimits(); - account = accountManager.pickNext(model); - - // If still no account after waiting, try optimistic reset - // This handles cases where the API rate limit is transient - if (!account) { - logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...'); - accountManager.resetAllRateLimits(); - account = accountManager.pickNext(model); - } + continue; // Retry the loop } - if (!account) { - // Check if fallback is enabled and available - if (fallbackEnabled) { - const fallbackModel = getFallbackModel(model); - if (fallbackModel) { - logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`); - // Retry with fallback model - const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; - return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call - } + // Check if fallback is enabled and available + if (fallbackEnabled) { + const fallbackModel = getFallbackModel(model); + if (fallbackModel) { + logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`); + const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; + return await sendMessage(fallbackRequest, accountManager, false); } - throw new Error('No accounts available'); } + throw new Error('No accounts available'); + } + + // Pick sticky account (prefers current for cache continuity) + let account = accountManager.getCurrentStickyAccount(model); + if (!account) { + account = accountManager.pickNext(model); + } + + if (!account) { + continue; // Shouldn't happen, but safety check } try { @@ -112,6 +102,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab // Try each endpoint let lastError = null; + let retriedOnce = false; // Track if we've already retried for short rate limit + for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) { try { const url = isThinking @@ -137,14 +129,51 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab } if (response.status === 429) { - // Rate limited on this endpoint - try next endpoint first (DAILY → PROD) - logger.debug(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`); const resetMs = parseResetTime(response, errorText); - // Keep minimum reset time across all 429 responses - if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) { - lastError = { is429: true, response, errorText, resetMs }; + + // Decision: wait and retry OR switch account + if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) { + // Long-term quota exhaustion (> 10s) - switch to next account + logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`); + accountManager.markRateLimited(account.email, resetMs, model); + throw new Error(`QUOTA_EXHAUSTED: ${errorText}`); + } else { + // Short-term rate limit (<= 10s) - wait and retry once + const waitMs = resetMs || DEFAULT_COOLDOWN_MS; + + if (!retriedOnce) { + retriedOnce = true; + logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`); + await sleep(waitMs); + // Retry same endpoint + const retryResponse = await fetch(url, { + method: 'POST', + headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'), + body: JSON.stringify(payload) + }); + + if (retryResponse.ok) { + // Process retry response + if (isThinking) { + return await parseThinkingSSEResponse(retryResponse, anthropicRequest.model); + } + const data = await retryResponse.json(); + logger.debug('[CloudCode] Response received after retry'); + return convertGoogleToAnthropic(data, anthropicRequest.model); + } + + // Retry also failed - parse new reset time + const retryErrorText = await retryResponse.text(); + const retryResetMs = parseResetTime(retryResponse, retryErrorText); + logger.warn(`[CloudCode] Retry also failed, marking and switching...`); + accountManager.markRateLimited(account.email, retryResetMs || waitMs, model); + throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`); + } else { + // Already retried once, mark and switch + accountManager.markRateLimited(account.email, waitMs, model); + throw new Error(`RATE_LIMITED: ${errorText}`); + } } - continue; } if (response.status >= 400) { @@ -179,7 +208,6 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab // If all endpoints failed for this account if (lastError) { - // If all endpoints returned 429, mark account as rate-limited if (lastError.is429) { logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`); accountManager.markRateLimited(account.email, lastError.resetMs, model); @@ -199,18 +227,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`); continue; } - // Non-rate-limit error: throw immediately - // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one + // Handle 5xx errors if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) { logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`); - accountManager.pickNext(model); // Force advance to next account + accountManager.pickNext(model); continue; } if (isNetworkError(error)) { logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`); - await sleep(1000); // Brief pause before retry - accountManager.pickNext(model); // Advance to next account + await sleep(1000); + accountManager.pickNext(model); continue; } @@ -224,7 +251,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab if (fallbackModel) { logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`); const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; - return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call + return await sendMessage(fallbackRequest, accountManager, false); } } diff --git a/src/cloudcode/model-api.js b/src/cloudcode/model-api.js index 4ade5ee..35f5e2b 100644 --- a/src/cloudcode/model-api.js +++ b/src/cloudcode/model-api.js @@ -57,22 +57,26 @@ export async function listModels(token) { * Returns model quotas including remaining fraction and reset time * * @param {string} token - OAuth access token + * @param {string} [projectId] - Optional project ID for accurate quota info * @returns {Promise} Raw response from fetchAvailableModels API */ -export async function fetchAvailableModels(token) { +export async function fetchAvailableModels(token, projectId = null) { const headers = { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json', ...ANTIGRAVITY_HEADERS }; + // Include project ID in body for accurate quota info (per Quotio implementation) + const body = projectId ? { project: projectId } : {}; + for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) { try { const url = `${endpoint}/v1internal:fetchAvailableModels`; const response = await fetch(url, { method: 'POST', headers, - body: JSON.stringify({}) + body: JSON.stringify(body) }); if (!response.ok) { @@ -95,10 +99,11 @@ export async function fetchAvailableModels(token) { * Extracts quota info (remaining fraction and reset time) for each model * * @param {string} token - OAuth access token + * @param {string} [projectId] - Optional project ID for accurate quota info * @returns {Promise} Map of modelId -> { remainingFraction, resetTime } */ -export async function getModelQuotas(token) { - const data = await fetchAvailableModels(token); +export async function getModelQuotas(token, projectId = null) { + const data = await fetchAvailableModels(token, projectId); if (!data || !data.models) return {}; const quotas = {}; @@ -108,7 +113,8 @@ export async function getModelQuotas(token) { if (modelData.quotaInfo) { quotas[modelId] = { - remainingFraction: modelData.quotaInfo.remainingFraction ?? null, + // When remainingFraction is missing but resetTime is present, quota is exhausted (0%) + remainingFraction: modelData.quotaInfo.remainingFraction ?? (modelData.quotaInfo.resetTime ? 0 : null), resetTime: modelData.quotaInfo.resetTime ?? null }; } diff --git a/src/cloudcode/rate-limit-parser.js b/src/cloudcode/rate-limit-parser.js index 6e53b15..2fddb26 100644 --- a/src/cloudcode/rate-limit-parser.js +++ b/src/cloudcode/rate-limit-parser.js @@ -78,7 +78,7 @@ export function parseResetTime(responseOrError, errorText = '') { // Try to extract "quotaResetDelay" first (e.g. "754.431528ms" or "1.5s") // This is Google's preferred format for rate limit reset delay - const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\\d+(?:\\.\\d+)?)(ms|s)/i); + const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\d+(?:\.\d+)?)(ms|s)/i); if (quotaDelayMatch) { const value = parseFloat(quotaDelayMatch[1]); const unit = quotaDelayMatch[2].toLowerCase(); @@ -103,7 +103,7 @@ export function parseResetTime(responseOrError, errorText = '') { // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s") // Added stricter regex to avoid partial matches if (!resetMs) { - const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\\d\\.]+)(?:s\b|s")/i); + const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d.]+)(?:s\b|s")/i); if (secMatch) { resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000); logger.debug(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`); diff --git a/src/cloudcode/streaming-handler.js b/src/cloudcode/streaming-handler.js index 3f0dc55..188c6de 100644 --- a/src/cloudcode/streaming-handler.js +++ b/src/cloudcode/streaming-handler.js @@ -9,7 +9,8 @@ import { ANTIGRAVITY_ENDPOINT_FALLBACKS, MAX_RETRIES, MAX_EMPTY_RESPONSE_RETRIES, - MAX_WAIT_BEFORE_ERROR_MS + MAX_WAIT_BEFORE_ERROR_MS, + DEFAULT_COOLDOWN_MS } from '../constants.js'; import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js'; import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js'; @@ -38,68 +39,57 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb // Retry loop with account failover // Ensure we try at least as many times as there are accounts to cycle through everyone - // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1); for (let attempt = 0; attempt < maxAttempts; attempt++) { - // Use sticky account selection for cache continuity - const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model); - let account = stickyAccount; + // Clear any expired rate limits before picking + accountManager.clearExpiredLimits(); - // Handle waiting for sticky account - if (!account && waitMs > 0) { - logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`); - await sleep(waitMs); - accountManager.clearExpiredLimits(); - account = accountManager.getCurrentStickyAccount(model); - } + // Get available accounts for this model + const availableAccounts = accountManager.getAvailableAccounts(model); - // Handle all accounts rate-limited - if (!account) { + // If no accounts available, check if we should wait or throw error + if (availableAccounts.length === 0) { if (accountManager.isAllRateLimited(model)) { - const allWaitMs = accountManager.getMinWaitTimeMs(model); - const resetTime = new Date(Date.now() + allWaitMs).toISOString(); + const minWaitMs = accountManager.getMinWaitTimeMs(model); + const resetTime = new Date(Date.now() + minWaitMs).toISOString(); // If wait time is too long (> 2 minutes), throw error immediately - if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { + if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) { throw new Error( - `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}` + `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}` ); } - // Wait for reset (applies to both single and multi-account modes) + // Wait for shortest reset time const accountCount = accountManager.getAccountCount(); - logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`); - await sleep(allWaitMs); - - // Add small buffer after waiting to ensure rate limits have truly expired - await sleep(500); + logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`); + await sleep(minWaitMs + 500); // Add 500ms buffer accountManager.clearExpiredLimits(); - account = accountManager.pickNext(model); - - // If still no account after waiting, try optimistic reset - // This handles cases where the API rate limit is transient - if (!account) { - logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...'); - accountManager.resetAllRateLimits(); - account = accountManager.pickNext(model); - } + continue; // Retry the loop } - if (!account) { - // Check if fallback is enabled and available - if (fallbackEnabled) { - const fallbackModel = getFallbackModel(model); - if (fallbackModel) { - logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`); - // Retry with fallback model - const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; - yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call - return; - } + // Check if fallback is enabled and available + if (fallbackEnabled) { + const fallbackModel = getFallbackModel(model); + if (fallbackModel) { + logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`); + const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; + yield* sendMessageStream(fallbackRequest, accountManager, false); + return; } - throw new Error('No accounts available'); } + throw new Error('No accounts available'); + } + + // Pick sticky account (prefers current for cache continuity) + let account = accountManager.getCurrentStickyAccount(model); + if (!account) { + account = accountManager.pickNext(model); + } + + if (!account) { + continue; // Shouldn't happen, but safety check } try { @@ -112,6 +102,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb // Try each endpoint for streaming let lastError = null; + let retriedOnce = false; // Track if we've already retried for short rate limit + for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) { try { const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`; @@ -134,14 +126,48 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb } if (response.status === 429) { - // Rate limited on this endpoint - try next endpoint first (DAILY → PROD) - logger.debug(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`); const resetMs = parseResetTime(response, errorText); - // Keep minimum reset time across all 429 responses - if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) { - lastError = { is429: true, response, errorText, resetMs }; + + // Decision: wait and retry OR switch account + if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) { + // Long-term quota exhaustion (> 10s) - switch to next account + logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`); + accountManager.markRateLimited(account.email, resetMs, model); + throw new Error(`QUOTA_EXHAUSTED: ${errorText}`); + } else { + // Short-term rate limit (<= 10s) - wait and retry once + const waitMs = resetMs || DEFAULT_COOLDOWN_MS; + + if (!retriedOnce) { + retriedOnce = true; + logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`); + await sleep(waitMs); + // Retry same endpoint + const retryResponse = await fetch(url, { + method: 'POST', + headers: buildHeaders(token, model, 'text/event-stream'), + body: JSON.stringify(payload) + }); + + if (retryResponse.ok) { + // Stream the retry response + yield* streamSSEResponse(retryResponse, anthropicRequest.model); + logger.debug('[CloudCode] Stream completed after retry'); + return; + } + + // Retry also failed - parse new reset time + const retryErrorText = await retryResponse.text(); + const retryResetMs = parseResetTime(retryResponse, retryErrorText); + logger.warn(`[CloudCode] Retry also failed, marking and switching...`); + accountManager.markRateLimited(account.email, retryResetMs || waitMs, model); + throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`); + } else { + // Already retried once, mark and switch + accountManager.markRateLimited(account.email, waitMs, model); + throw new Error(`RATE_LIMITED: ${errorText}`); + } } - continue; } lastError = new Error(`API error ${response.status}: ${errorText}`); @@ -156,7 +182,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb } // Stream the response with retry logic for empty responses - // Uses a for-loop for clearer retry semantics let currentResponse = response; for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) { @@ -207,28 +232,22 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`); } - // For 5xx errors, don't pass to streamer - just continue to next retry + // For 5xx errors, continue retrying if (currentResponse.status >= 500) { logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`); - // Don't continue here - let the loop increment and refetch - // Set currentResponse to null to force refetch at loop start - emptyRetries--; // Compensate for loop increment since we didn't actually try await sleep(1000); - // Refetch immediately for 5xx currentResponse = await fetch(url, { method: 'POST', headers: buildHeaders(token, model, 'text/event-stream'), body: JSON.stringify(payload) }); if (currentResponse.ok) { - continue; // Try streaming with new response + continue; } - // If still failing, let it fall through to throw } throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`); } - // Response is OK, loop will continue to try streamSSEResponse } } @@ -237,7 +256,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb throw endpointError; // Re-throw to trigger account switch } if (isEmptyResponseError(endpointError)) { - throw endpointError; // Re-throw empty response errors to outer handler + throw endpointError; } logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message); lastError = endpointError; @@ -246,7 +265,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb // If all endpoints failed for this account if (lastError) { - // If all endpoints returned 429, mark account as rate-limited if (lastError.is429) { logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`); accountManager.markRateLimited(account.email, lastError.resetMs, model); @@ -266,18 +284,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`); continue; } - // Non-rate-limit error: throw immediately - // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one + // Handle 5xx errors if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) { logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`); - accountManager.pickNext(model); // Force advance to next account + accountManager.pickNext(model); continue; } if (isNetworkError(error)) { logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`); - await sleep(1000); // Brief pause before retry - accountManager.pickNext(model); // Advance to next account + await sleep(1000); + accountManager.pickNext(model); continue; } @@ -291,7 +308,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb if (fallbackModel) { logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`); const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; - yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call + yield* sendMessageStream(fallbackRequest, accountManager, false); return; } } diff --git a/src/constants.js b/src/constants.js index 022ea48..8fadc89 100644 --- a/src/constants.js +++ b/src/constants.js @@ -69,15 +69,16 @@ export const ONBOARD_USER_ENDPOINTS = ANTIGRAVITY_ENDPOINT_FALLBACKS; // Hybrid headers specifically for loadCodeAssist // Uses google-api-nodejs-client User-Agent (required for project discovery on some accounts) -export const LOAD_CODE_ASSIST_HEADERS = { - 'User-Agent': 'google-api-nodejs-client/9.15.1', - 'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1', - 'Client-Metadata': JSON.stringify({ - ideType: 'IDE_UNSPECIFIED', - platform: 'PLATFORM_UNSPECIFIED', - pluginType: 'GEMINI' - }) -}; +// export const LOAD_CODE_ASSIST_HEADERS = { +// 'User-Agent': 'google-api-nodejs-client/9.15.1', +// 'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1', +// 'Client-Metadata': JSON.stringify({ +// ideType: 'IDE_UNSPECIFIED', +// platform: 'PLATFORM_UNSPECIFIED', +// pluginType: 'GEMINI' +// }) +// }; +export const LOAD_CODE_ASSIST_HEADERS = ANTIGRAVITY_HEADERS; // Default project ID if none can be discovered export const DEFAULT_PROJECT_ID = 'rising-fact-p41fc'; diff --git a/src/server.js b/src/server.js index c498da5..43e2ccb 100644 --- a/src/server.js +++ b/src/server.js @@ -214,7 +214,8 @@ app.get('/health', async (req, res) => { try { const token = await accountManager.getTokenForAccount(account); - const quotas = await getModelQuotas(token); + const projectId = account.subscription?.projectId || null; + const quotas = await getModelQuotas(token, projectId); // Format quotas for readability const formattedQuotas = {}; @@ -309,11 +310,11 @@ app.get('/account-limits', async (req, res) => { try { const token = await accountManager.getTokenForAccount(account); - // Fetch both quotas and subscription tier in parallel - const [quotas, subscription] = await Promise.all([ - getModelQuotas(token), - getSubscriptionTier(token) - ]); + // Fetch subscription tier first to get project ID + const subscription = await getSubscriptionTier(token); + + // Then fetch quotas with project ID for accurate quota info + const quotas = await getModelQuotas(token, subscription.projectId); // Update account object with fresh data account.subscription = {