refactor: Reorganize src/ into modular folder structure
Split large monolithic files into focused modules: - cloudcode-client.js (1,107 lines) → src/cloudcode/ (9 files) - account-manager.js (639 lines) → src/account-manager/ (5 files) - Move auth files to src/auth/ (oauth, token-extractor, database) - Move CLI to src/cli/accounts.js Update all import paths and documentation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
28
src/cloudcode/index.js
Normal file
28
src/cloudcode/index.js
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Cloud Code Client for Antigravity
|
||||
*
|
||||
* Communicates with Google's Cloud Code internal API using the
|
||||
* v1internal:streamGenerateContent endpoint with proper request wrapping.
|
||||
*
|
||||
* Supports multi-account load balancing with automatic failover.
|
||||
*
|
||||
* Based on: https://github.com/NoeFabris/opencode-antigravity-auth
|
||||
*/
|
||||
|
||||
// Re-export public API
|
||||
export { sendMessage } from './message-handler.js';
|
||||
export { sendMessageStream } from './streaming-handler.js';
|
||||
export { listModels, fetchAvailableModels, getModelQuotas } from './model-api.js';
|
||||
|
||||
// Default export for backwards compatibility
|
||||
import { sendMessage } from './message-handler.js';
|
||||
import { sendMessageStream } from './streaming-handler.js';
|
||||
import { listModels, fetchAvailableModels, getModelQuotas } from './model-api.js';
|
||||
|
||||
export default {
|
||||
sendMessage,
|
||||
sendMessageStream,
|
||||
listModels,
|
||||
fetchAvailableModels,
|
||||
getModelQuotas
|
||||
};
|
||||
209
src/cloudcode/message-handler.js
Normal file
209
src/cloudcode/message-handler.js
Normal file
@@ -0,0 +1,209 @@
|
||||
/**
|
||||
* Message Handler for Cloud Code
|
||||
*
|
||||
* Handles non-streaming message requests with multi-account support,
|
||||
* retry logic, and endpoint failover.
|
||||
*/
|
||||
|
||||
import {
|
||||
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
||||
MAX_RETRIES,
|
||||
MAX_WAIT_BEFORE_ERROR_MS,
|
||||
isThinkingModel
|
||||
} from '../constants.js';
|
||||
import { convertGoogleToAnthropic } from '../format/index.js';
|
||||
import { isRateLimitError, isAuthError } from '../errors.js';
|
||||
import { formatDuration, sleep } from '../utils/helpers.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { parseResetTime } from './rate-limit-parser.js';
|
||||
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
||||
import { parseThinkingSSEResponse } from './sse-parser.js';
|
||||
|
||||
/**
|
||||
* Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED)
|
||||
* @deprecated Use isRateLimitError from errors.js instead
|
||||
*/
|
||||
function is429Error(error) {
|
||||
return isRateLimitError(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is an auth-invalid error (credentials need re-authentication)
|
||||
* @deprecated Use isAuthError from errors.js instead
|
||||
*/
|
||||
function isAuthInvalidError(error) {
|
||||
return isAuthError(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a non-streaming request to Cloud Code with multi-account support
|
||||
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
|
||||
*
|
||||
* @param {Object} anthropicRequest - The Anthropic-format request
|
||||
* @param {Object} anthropicRequest.model - Model name to use
|
||||
* @param {Array} anthropicRequest.messages - Array of message objects
|
||||
* @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
|
||||
* @param {Object} [anthropicRequest.thinking] - Thinking configuration
|
||||
* @param {import('../account-manager/index.js').default} accountManager - The account manager instance
|
||||
* @returns {Promise<Object>} Anthropic-format response object
|
||||
* @throws {Error} If max retries exceeded or no accounts available
|
||||
*/
|
||||
export async function sendMessage(anthropicRequest, accountManager) {
|
||||
const model = anthropicRequest.model;
|
||||
const isThinking = isThinkingModel(model);
|
||||
|
||||
// Retry loop with account failover
|
||||
// Ensure we try at least as many times as there are accounts to cycle through everyone
|
||||
// +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
|
||||
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
// Use sticky account selection for cache continuity
|
||||
const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
|
||||
let account = stickyAccount;
|
||||
|
||||
// Handle waiting for sticky account
|
||||
if (!account && waitMs > 0) {
|
||||
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
|
||||
await sleep(waitMs);
|
||||
accountManager.clearExpiredLimits();
|
||||
account = accountManager.getCurrentStickyAccount();
|
||||
}
|
||||
|
||||
// Handle all accounts rate-limited
|
||||
if (!account) {
|
||||
if (accountManager.isAllRateLimited()) {
|
||||
const allWaitMs = accountManager.getMinWaitTimeMs();
|
||||
const resetTime = new Date(Date.now() + allWaitMs).toISOString();
|
||||
|
||||
// If wait time is too long (> 2 minutes), throw error immediately
|
||||
if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
||||
throw new Error(
|
||||
`RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for reset (applies to both single and multi-account modes)
|
||||
const accountCount = accountManager.getAccountCount();
|
||||
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
|
||||
await sleep(allWaitMs);
|
||||
accountManager.clearExpiredLimits();
|
||||
account = accountManager.pickNext();
|
||||
}
|
||||
|
||||
if (!account) {
|
||||
throw new Error('No accounts available');
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Get token and project for this account
|
||||
const token = await accountManager.getTokenForAccount(account);
|
||||
const project = await accountManager.getProjectForAccount(account, token);
|
||||
const payload = buildCloudCodeRequest(anthropicRequest, project);
|
||||
|
||||
logger.debug(`[CloudCode] Sending request for model: ${model}`);
|
||||
|
||||
// Try each endpoint
|
||||
let lastError = null;
|
||||
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
||||
try {
|
||||
const url = isThinking
|
||||
? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
|
||||
: `${endpoint}/v1internal:generateContent`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
|
||||
|
||||
if (response.status === 401) {
|
||||
// Auth error - clear caches and retry with fresh token
|
||||
logger.warn('[CloudCode] Auth error, refreshing token...');
|
||||
accountManager.clearTokenCache(account.email);
|
||||
accountManager.clearProjectCache(account.email);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response.status === 429) {
|
||||
// Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
|
||||
logger.debug(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`);
|
||||
const resetMs = parseResetTime(response, errorText);
|
||||
// Keep minimum reset time across all 429 responses
|
||||
if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
|
||||
lastError = { is429: true, response, errorText, resetMs };
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response.status >= 400) {
|
||||
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
||||
// If it's a 5xx error, wait a bit before trying the next endpoint
|
||||
if (response.status >= 500) {
|
||||
logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
|
||||
await sleep(1000);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// For thinking models, parse SSE and accumulate all parts
|
||||
if (isThinking) {
|
||||
return await parseThinkingSSEResponse(response, anthropicRequest.model);
|
||||
}
|
||||
|
||||
// Non-thinking models use regular JSON
|
||||
const data = await response.json();
|
||||
logger.debug('[CloudCode] Response received');
|
||||
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
||||
|
||||
} catch (endpointError) {
|
||||
if (is429Error(endpointError)) {
|
||||
throw endpointError; // Re-throw to trigger account switch
|
||||
}
|
||||
logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
|
||||
lastError = endpointError;
|
||||
}
|
||||
}
|
||||
|
||||
// If all endpoints failed for this account
|
||||
if (lastError) {
|
||||
// If all endpoints returned 429, mark account as rate-limited
|
||||
if (lastError.is429) {
|
||||
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
|
||||
accountManager.markRateLimited(account.email, lastError.resetMs);
|
||||
throw new Error(`Rate limited: ${lastError.errorText}`);
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (is429Error(error)) {
|
||||
// Rate limited - already marked, continue to next account
|
||||
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
|
||||
continue;
|
||||
}
|
||||
if (isAuthInvalidError(error)) {
|
||||
// Auth invalid - already marked, continue to next account
|
||||
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
|
||||
continue;
|
||||
}
|
||||
// Non-rate-limit error: throw immediately
|
||||
// UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
|
||||
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
|
||||
accountManager.pickNext(); // Force advance to next account
|
||||
continue;
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Max retries exceeded');
|
||||
}
|
||||
97
src/cloudcode/model-api.js
Normal file
97
src/cloudcode/model-api.js
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Model API for Cloud Code
|
||||
*
|
||||
* Handles model listing and quota retrieval from the Cloud Code API.
|
||||
*/
|
||||
|
||||
import { ANTIGRAVITY_ENDPOINT_FALLBACKS, ANTIGRAVITY_HEADERS } from '../constants.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
/**
|
||||
* List available models in Anthropic API format
|
||||
* Fetches models dynamically from the Cloud Code API
|
||||
*
|
||||
* @param {string} token - OAuth access token
|
||||
* @returns {Promise<{object: string, data: Array<{id: string, object: string, created: number, owned_by: string, description: string}>}>} List of available models
|
||||
*/
|
||||
export async function listModels(token) {
|
||||
const data = await fetchAvailableModels(token);
|
||||
if (!data || !data.models) {
|
||||
return { object: 'list', data: [] };
|
||||
}
|
||||
|
||||
const modelList = Object.entries(data.models).map(([modelId, modelData]) => ({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
owned_by: 'anthropic',
|
||||
description: modelData.displayName || modelId
|
||||
}));
|
||||
|
||||
return {
|
||||
object: 'list',
|
||||
data: modelList
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch available models with quota info from Cloud Code API
|
||||
* Returns model quotas including remaining fraction and reset time
|
||||
*
|
||||
* @param {string} token - OAuth access token
|
||||
* @returns {Promise<Object>} Raw response from fetchAvailableModels API
|
||||
*/
|
||||
export async function fetchAvailableModels(token) {
|
||||
const headers = {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
...ANTIGRAVITY_HEADERS
|
||||
};
|
||||
|
||||
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
||||
try {
|
||||
const url = `${endpoint}/v1internal:fetchAvailableModels`;
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.warn(`[CloudCode] fetchAvailableModels error at ${endpoint}: ${response.status}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
logger.warn(`[CloudCode] fetchAvailableModels failed at ${endpoint}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Failed to fetch available models from all endpoints');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get model quotas for an account
|
||||
* Extracts quota info (remaining fraction and reset time) for each model
|
||||
*
|
||||
* @param {string} token - OAuth access token
|
||||
* @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
|
||||
*/
|
||||
export async function getModelQuotas(token) {
|
||||
const data = await fetchAvailableModels(token);
|
||||
if (!data || !data.models) return {};
|
||||
|
||||
const quotas = {};
|
||||
for (const [modelId, modelData] of Object.entries(data.models)) {
|
||||
if (modelData.quotaInfo) {
|
||||
quotas[modelId] = {
|
||||
remainingFraction: modelData.quotaInfo.remainingFraction ?? null,
|
||||
resetTime: modelData.quotaInfo.resetTime ?? null
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return quotas;
|
||||
}
|
||||
181
src/cloudcode/rate-limit-parser.js
Normal file
181
src/cloudcode/rate-limit-parser.js
Normal file
@@ -0,0 +1,181 @@
|
||||
/**
|
||||
* Rate Limit Parser for Cloud Code
|
||||
*
|
||||
* Parses reset times from HTTP headers and error messages.
|
||||
* Supports various formats: Retry-After, x-ratelimit-reset,
|
||||
* quotaResetDelay, quotaResetTimeStamp, and duration strings.
|
||||
*/
|
||||
|
||||
import { formatDuration } from '../utils/helpers.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
/**
|
||||
* Parse reset time from HTTP response or error
|
||||
* Checks headers first, then error message body
|
||||
* Returns milliseconds or null if not found
|
||||
*
|
||||
* @param {Response|Error} responseOrError - HTTP Response object or Error
|
||||
* @param {string} errorText - Optional error body text
|
||||
*/
|
||||
export function parseResetTime(responseOrError, errorText = '') {
|
||||
let resetMs = null;
|
||||
|
||||
// If it's a Response object, check headers first
|
||||
if (responseOrError && typeof responseOrError.headers?.get === 'function') {
|
||||
const headers = responseOrError.headers;
|
||||
|
||||
// Standard Retry-After header (seconds or HTTP date)
|
||||
const retryAfter = headers.get('retry-after');
|
||||
if (retryAfter) {
|
||||
const seconds = parseInt(retryAfter, 10);
|
||||
if (!isNaN(seconds)) {
|
||||
resetMs = seconds * 1000;
|
||||
logger.debug(`[CloudCode] Retry-After header: ${seconds}s`);
|
||||
} else {
|
||||
// Try parsing as HTTP date
|
||||
const date = new Date(retryAfter);
|
||||
if (!isNaN(date.getTime())) {
|
||||
resetMs = date.getTime() - Date.now();
|
||||
if (resetMs > 0) {
|
||||
logger.debug(`[CloudCode] Retry-After date: ${retryAfter}`);
|
||||
} else {
|
||||
resetMs = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// x-ratelimit-reset (Unix timestamp in seconds)
|
||||
if (!resetMs) {
|
||||
const ratelimitReset = headers.get('x-ratelimit-reset');
|
||||
if (ratelimitReset) {
|
||||
const resetTimestamp = parseInt(ratelimitReset, 10) * 1000;
|
||||
resetMs = resetTimestamp - Date.now();
|
||||
if (resetMs > 0) {
|
||||
logger.debug(`[CloudCode] x-ratelimit-reset: ${new Date(resetTimestamp).toISOString()}`);
|
||||
} else {
|
||||
resetMs = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// x-ratelimit-reset-after (seconds)
|
||||
if (!resetMs) {
|
||||
const resetAfter = headers.get('x-ratelimit-reset-after');
|
||||
if (resetAfter) {
|
||||
const seconds = parseInt(resetAfter, 10);
|
||||
if (!isNaN(seconds) && seconds > 0) {
|
||||
resetMs = seconds * 1000;
|
||||
logger.debug(`[CloudCode] x-ratelimit-reset-after: ${seconds}s`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no header found, try parsing from error message/body
|
||||
if (!resetMs) {
|
||||
const msg = (responseOrError instanceof Error ? responseOrError.message : errorText) || '';
|
||||
|
||||
// Try to extract "quotaResetDelay" first (e.g. "754.431528ms" or "1.5s")
|
||||
// This is Google's preferred format for rate limit reset delay
|
||||
const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\\d+(?:\\.\\d+)?)(ms|s)/i);
|
||||
if (quotaDelayMatch) {
|
||||
const value = parseFloat(quotaDelayMatch[1]);
|
||||
const unit = quotaDelayMatch[2].toLowerCase();
|
||||
resetMs = unit === 's' ? Math.ceil(value * 1000) : Math.ceil(value);
|
||||
logger.debug(`[CloudCode] Parsed quotaResetDelay from body: ${resetMs}ms`);
|
||||
}
|
||||
|
||||
// Try to extract "quotaResetTimeStamp" (ISO format like "2025-12-31T07:00:47Z")
|
||||
if (!resetMs) {
|
||||
const quotaTimestampMatch = msg.match(/quotaResetTimeStamp[:\s"]+(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)/i);
|
||||
if (quotaTimestampMatch) {
|
||||
const resetTime = new Date(quotaTimestampMatch[1]).getTime();
|
||||
if (!isNaN(resetTime)) {
|
||||
resetMs = resetTime - Date.now();
|
||||
// Even if expired or 0, we found a timestamp, so rely on it.
|
||||
// But if it's negative, it means "now", so treat as small wait.
|
||||
logger.debug(`[CloudCode] Parsed quotaResetTimeStamp: ${quotaTimestampMatch[1]} (Delta: ${resetMs}ms)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
|
||||
// Added stricter regex to avoid partial matches
|
||||
if (!resetMs) {
|
||||
const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\\d\\.]+)(?:s\b|s")/i);
|
||||
if (secMatch) {
|
||||
resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
|
||||
logger.debug(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!resetMs) {
|
||||
// Check for ms (explicit "ms" suffix or implicit if no suffix)
|
||||
const msMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+(\d+)(?:\s*ms)?(?![\w.])/i);
|
||||
if (msMatch) {
|
||||
resetMs = parseInt(msMatch[1], 10);
|
||||
logger.debug(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract seconds value like "retry after 60 seconds"
|
||||
if (!resetMs) {
|
||||
const secMatch = msg.match(/retry\s+(?:after\s+)?(\d+)\s*(?:sec|s\b)/i);
|
||||
if (secMatch) {
|
||||
resetMs = parseInt(secMatch[1], 10) * 1000;
|
||||
logger.debug(`[CloudCode] Parsed retry seconds from body: ${secMatch[1]}s`);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract duration like "1h23m45s" or "23m45s" or "45s"
|
||||
if (!resetMs) {
|
||||
const durationMatch = msg.match(/(\d+)h(\d+)m(\d+)s|(\d+)m(\d+)s|(\d+)s/i);
|
||||
if (durationMatch) {
|
||||
if (durationMatch[1]) {
|
||||
const hours = parseInt(durationMatch[1], 10);
|
||||
const minutes = parseInt(durationMatch[2], 10);
|
||||
const seconds = parseInt(durationMatch[3], 10);
|
||||
resetMs = (hours * 3600 + minutes * 60 + seconds) * 1000;
|
||||
} else if (durationMatch[4]) {
|
||||
const minutes = parseInt(durationMatch[4], 10);
|
||||
const seconds = parseInt(durationMatch[5], 10);
|
||||
resetMs = (minutes * 60 + seconds) * 1000;
|
||||
} else if (durationMatch[6]) {
|
||||
resetMs = parseInt(durationMatch[6], 10) * 1000;
|
||||
}
|
||||
if (resetMs) {
|
||||
logger.debug(`[CloudCode] Parsed duration from body: ${formatDuration(resetMs)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract ISO timestamp or Unix timestamp
|
||||
if (!resetMs) {
|
||||
const isoMatch = msg.match(/reset[:\s"]+(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)/i);
|
||||
if (isoMatch) {
|
||||
const resetTime = new Date(isoMatch[1]).getTime();
|
||||
if (!isNaN(resetTime)) {
|
||||
resetMs = resetTime - Date.now();
|
||||
if (resetMs > 0) {
|
||||
logger.debug(`[CloudCode] Parsed ISO reset time: ${isoMatch[1]}`);
|
||||
} else {
|
||||
resetMs = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SANITY CHECK: Enforce strict minimums for found rate limits
|
||||
// If we found a reset time, but it's very small (e.g. < 1s) or negative,
|
||||
// explicitly bump it up to avoid "Available in 0s" loops.
|
||||
if (resetMs !== null) {
|
||||
if (resetMs < 1000) {
|
||||
logger.debug(`[CloudCode] Reset time too small (${resetMs}ms), enforcing 2s buffer`);
|
||||
resetMs = 2000;
|
||||
}
|
||||
}
|
||||
|
||||
return resetMs;
|
||||
}
|
||||
68
src/cloudcode/request-builder.js
Normal file
68
src/cloudcode/request-builder.js
Normal file
@@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Request Builder for Cloud Code
|
||||
*
|
||||
* Builds request payloads and headers for the Cloud Code API.
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
import {
|
||||
ANTIGRAVITY_HEADERS,
|
||||
getModelFamily,
|
||||
isThinkingModel
|
||||
} from '../constants.js';
|
||||
import { convertAnthropicToGoogle } from '../format/index.js';
|
||||
import { deriveSessionId } from './session-manager.js';
|
||||
|
||||
/**
|
||||
* Build the wrapped request body for Cloud Code API
|
||||
*
|
||||
* @param {Object} anthropicRequest - The Anthropic-format request
|
||||
* @param {string} projectId - The project ID to use
|
||||
* @returns {Object} The Cloud Code API request payload
|
||||
*/
|
||||
export function buildCloudCodeRequest(anthropicRequest, projectId) {
|
||||
const model = anthropicRequest.model;
|
||||
const googleRequest = convertAnthropicToGoogle(anthropicRequest);
|
||||
|
||||
// Use stable session ID derived from first user message for cache continuity
|
||||
googleRequest.sessionId = deriveSessionId(anthropicRequest);
|
||||
|
||||
const payload = {
|
||||
project: projectId,
|
||||
model: model,
|
||||
request: googleRequest,
|
||||
userAgent: 'antigravity',
|
||||
requestId: 'agent-' + crypto.randomUUID()
|
||||
};
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build headers for Cloud Code API requests
|
||||
*
|
||||
* @param {string} token - OAuth access token
|
||||
* @param {string} model - Model name
|
||||
* @param {string} accept - Accept header value (default: 'application/json')
|
||||
* @returns {Object} Headers object
|
||||
*/
|
||||
export function buildHeaders(token, model, accept = 'application/json') {
|
||||
const headers = {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
...ANTIGRAVITY_HEADERS
|
||||
};
|
||||
|
||||
const modelFamily = getModelFamily(model);
|
||||
|
||||
// Add interleaved thinking header only for Claude thinking models
|
||||
if (modelFamily === 'claude' && isThinkingModel(model)) {
|
||||
headers['anthropic-beta'] = 'interleaved-thinking-2025-05-14';
|
||||
}
|
||||
|
||||
if (accept !== 'application/json') {
|
||||
headers['Accept'] = accept;
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
47
src/cloudcode/session-manager.js
Normal file
47
src/cloudcode/session-manager.js
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Session Management for Cloud Code
|
||||
*
|
||||
* Handles session ID derivation for prompt caching continuity.
|
||||
* Session IDs are derived from the first user message to ensure
|
||||
* the same conversation uses the same session across turns.
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
|
||||
/**
|
||||
* Derive a stable session ID from the first user message in the conversation.
|
||||
* This ensures the same conversation uses the same session ID across turns,
|
||||
* enabling prompt caching (cache is scoped to session + organization).
|
||||
*
|
||||
* @param {Object} anthropicRequest - The Anthropic-format request
|
||||
* @returns {string} A stable session ID (32 hex characters) or random UUID if no user message
|
||||
*/
|
||||
export function deriveSessionId(anthropicRequest) {
|
||||
const messages = anthropicRequest.messages || [];
|
||||
|
||||
// Find the first user message
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'user') {
|
||||
let content = '';
|
||||
|
||||
if (typeof msg.content === 'string') {
|
||||
content = msg.content;
|
||||
} else if (Array.isArray(msg.content)) {
|
||||
// Extract text from content blocks
|
||||
content = msg.content
|
||||
.filter(block => block.type === 'text' && block.text)
|
||||
.map(block => block.text)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
if (content) {
|
||||
// Hash the content with SHA256, return first 32 hex chars
|
||||
const hash = crypto.createHash('sha256').update(content).digest('hex');
|
||||
return hash.substring(0, 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to random UUID if no user message found
|
||||
return crypto.randomUUID();
|
||||
}
|
||||
116
src/cloudcode/sse-parser.js
Normal file
116
src/cloudcode/sse-parser.js
Normal file
@@ -0,0 +1,116 @@
|
||||
/**
|
||||
* SSE Parser for Cloud Code
|
||||
*
|
||||
* Parses SSE responses for non-streaming thinking models.
|
||||
* Accumulates all parts and returns a single response.
|
||||
*/
|
||||
|
||||
import { convertGoogleToAnthropic } from '../format/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
/**
|
||||
* Parse SSE response for thinking models and accumulate all parts
|
||||
*
|
||||
* @param {Response} response - The HTTP response with SSE body
|
||||
* @param {string} originalModel - The original model name
|
||||
* @returns {Promise<Object>} Anthropic-format response object
|
||||
*/
|
||||
export async function parseThinkingSSEResponse(response, originalModel) {
|
||||
let accumulatedThinkingText = '';
|
||||
let accumulatedThinkingSignature = '';
|
||||
let accumulatedText = '';
|
||||
const finalParts = [];
|
||||
let usageMetadata = {};
|
||||
let finishReason = 'STOP';
|
||||
|
||||
const flushThinking = () => {
|
||||
if (accumulatedThinkingText) {
|
||||
finalParts.push({
|
||||
thought: true,
|
||||
text: accumulatedThinkingText,
|
||||
thoughtSignature: accumulatedThinkingSignature
|
||||
});
|
||||
accumulatedThinkingText = '';
|
||||
accumulatedThinkingSignature = '';
|
||||
}
|
||||
};
|
||||
|
||||
const flushText = () => {
|
||||
if (accumulatedText) {
|
||||
finalParts.push({ text: accumulatedText });
|
||||
accumulatedText = '';
|
||||
}
|
||||
};
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data:')) continue;
|
||||
const jsonText = line.slice(5).trim();
|
||||
if (!jsonText) continue;
|
||||
|
||||
try {
|
||||
const data = JSON.parse(jsonText);
|
||||
const innerResponse = data.response || data;
|
||||
|
||||
if (innerResponse.usageMetadata) {
|
||||
usageMetadata = innerResponse.usageMetadata;
|
||||
}
|
||||
|
||||
const candidates = innerResponse.candidates || [];
|
||||
const firstCandidate = candidates[0] || {};
|
||||
if (firstCandidate.finishReason) {
|
||||
finishReason = firstCandidate.finishReason;
|
||||
}
|
||||
|
||||
const parts = firstCandidate.content?.parts || [];
|
||||
for (const part of parts) {
|
||||
if (part.thought === true) {
|
||||
flushText();
|
||||
accumulatedThinkingText += (part.text || '');
|
||||
if (part.thoughtSignature) {
|
||||
accumulatedThinkingSignature = part.thoughtSignature;
|
||||
}
|
||||
} else if (part.functionCall) {
|
||||
flushThinking();
|
||||
flushText();
|
||||
finalParts.push(part);
|
||||
} else if (part.text !== undefined) {
|
||||
if (!part.text) continue;
|
||||
flushThinking();
|
||||
accumulatedText += part.text;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
logger.debug('[CloudCode] SSE parse warning:', e.message, 'Raw:', jsonText.slice(0, 100));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
flushThinking();
|
||||
flushText();
|
||||
|
||||
const accumulatedResponse = {
|
||||
candidates: [{ content: { parts: finalParts }, finishReason }],
|
||||
usageMetadata
|
||||
};
|
||||
|
||||
const partTypes = finalParts.map(p => p.thought ? 'thought' : (p.functionCall ? 'functionCall' : 'text'));
|
||||
logger.debug('[CloudCode] Response received (SSE), part types:', partTypes);
|
||||
if (finalParts.some(p => p.thought)) {
|
||||
const thinkingPart = finalParts.find(p => p.thought);
|
||||
logger.debug('[CloudCode] Thinking signature length:', thinkingPart?.thoughtSignature?.length || 0);
|
||||
}
|
||||
|
||||
return convertGoogleToAnthropic(accumulatedResponse, originalModel);
|
||||
}
|
||||
285
src/cloudcode/sse-streamer.js
Normal file
285
src/cloudcode/sse-streamer.js
Normal file
@@ -0,0 +1,285 @@
|
||||
/**
|
||||
* SSE Streamer for Cloud Code
|
||||
*
|
||||
* Streams SSE events in real-time, converting Google format to Anthropic format.
|
||||
* Handles thinking blocks, text blocks, and tool use blocks.
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
import { MIN_SIGNATURE_LENGTH } from '../constants.js';
|
||||
import { cacheSignature } from '../format/signature-cache.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
/**
|
||||
* Stream SSE response and yield Anthropic-format events
|
||||
*
|
||||
* @param {Response} response - The HTTP response with SSE body
|
||||
* @param {string} originalModel - The original model name
|
||||
* @yields {Object} Anthropic-format SSE events
|
||||
*/
|
||||
export async function* streamSSEResponse(response, originalModel) {
|
||||
const messageId = `msg_${crypto.randomBytes(16).toString('hex')}`;
|
||||
let hasEmittedStart = false;
|
||||
let blockIndex = 0;
|
||||
let currentBlockType = null;
|
||||
let currentThinkingSignature = '';
|
||||
let inputTokens = 0;
|
||||
let outputTokens = 0;
|
||||
let cacheReadTokens = 0;
|
||||
let stopReason = 'end_turn';
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data:')) continue;
|
||||
|
||||
const jsonText = line.slice(5).trim();
|
||||
if (!jsonText) continue;
|
||||
|
||||
try {
|
||||
const data = JSON.parse(jsonText);
|
||||
const innerResponse = data.response || data;
|
||||
|
||||
// Extract usage metadata (including cache tokens)
|
||||
const usage = innerResponse.usageMetadata;
|
||||
if (usage) {
|
||||
inputTokens = usage.promptTokenCount || inputTokens;
|
||||
outputTokens = usage.candidatesTokenCount || outputTokens;
|
||||
cacheReadTokens = usage.cachedContentTokenCount || cacheReadTokens;
|
||||
}
|
||||
|
||||
const candidates = innerResponse.candidates || [];
|
||||
const firstCandidate = candidates[0] || {};
|
||||
const content = firstCandidate.content || {};
|
||||
const parts = content.parts || [];
|
||||
|
||||
// Emit message_start on first data
|
||||
// Note: input_tokens = promptTokenCount - cachedContentTokenCount (Antigravity includes cached in total)
|
||||
if (!hasEmittedStart && parts.length > 0) {
|
||||
hasEmittedStart = true;
|
||||
yield {
|
||||
type: 'message_start',
|
||||
message: {
|
||||
id: messageId,
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: [],
|
||||
model: originalModel,
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: inputTokens - cacheReadTokens,
|
||||
output_tokens: 0,
|
||||
cache_read_input_tokens: cacheReadTokens,
|
||||
cache_creation_input_tokens: 0
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Process each part
|
||||
for (const part of parts) {
|
||||
if (part.thought === true) {
|
||||
// Handle thinking block
|
||||
const text = part.text || '';
|
||||
const signature = part.thoughtSignature || '';
|
||||
|
||||
if (currentBlockType !== 'thinking') {
|
||||
if (currentBlockType !== null) {
|
||||
yield { type: 'content_block_stop', index: blockIndex };
|
||||
blockIndex++;
|
||||
}
|
||||
currentBlockType = 'thinking';
|
||||
currentThinkingSignature = '';
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
index: blockIndex,
|
||||
content_block: { type: 'thinking', thinking: '' }
|
||||
};
|
||||
}
|
||||
|
||||
if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
|
||||
currentThinkingSignature = signature;
|
||||
}
|
||||
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: blockIndex,
|
||||
delta: { type: 'thinking_delta', thinking: text }
|
||||
};
|
||||
|
||||
} else if (part.text !== undefined) {
|
||||
// Skip empty text parts
|
||||
if (!part.text || part.text.trim().length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle regular text
|
||||
if (currentBlockType !== 'text') {
|
||||
if (currentBlockType === 'thinking' && currentThinkingSignature) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: blockIndex,
|
||||
delta: { type: 'signature_delta', signature: currentThinkingSignature }
|
||||
};
|
||||
currentThinkingSignature = '';
|
||||
}
|
||||
if (currentBlockType !== null) {
|
||||
yield { type: 'content_block_stop', index: blockIndex };
|
||||
blockIndex++;
|
||||
}
|
||||
currentBlockType = 'text';
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
index: blockIndex,
|
||||
content_block: { type: 'text', text: '' }
|
||||
};
|
||||
}
|
||||
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: blockIndex,
|
||||
delta: { type: 'text_delta', text: part.text }
|
||||
};
|
||||
|
||||
} else if (part.functionCall) {
|
||||
// Handle tool use
|
||||
// For Gemini 3+, capture thoughtSignature from the functionCall part
|
||||
// The signature is a sibling to functionCall, not inside it
|
||||
const functionCallSignature = part.thoughtSignature || '';
|
||||
|
||||
if (currentBlockType === 'thinking' && currentThinkingSignature) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: blockIndex,
|
||||
delta: { type: 'signature_delta', signature: currentThinkingSignature }
|
||||
};
|
||||
currentThinkingSignature = '';
|
||||
}
|
||||
if (currentBlockType !== null) {
|
||||
yield { type: 'content_block_stop', index: blockIndex };
|
||||
blockIndex++;
|
||||
}
|
||||
currentBlockType = 'tool_use';
|
||||
stopReason = 'tool_use';
|
||||
|
||||
const toolId = part.functionCall.id || `toolu_${crypto.randomBytes(12).toString('hex')}`;
|
||||
|
||||
// For Gemini, include the thoughtSignature in the tool_use block
|
||||
// so it can be sent back in subsequent requests
|
||||
const toolUseBlock = {
|
||||
type: 'tool_use',
|
||||
id: toolId,
|
||||
name: part.functionCall.name,
|
||||
input: {}
|
||||
};
|
||||
|
||||
// Store the signature in the tool_use block for later retrieval
|
||||
if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) {
|
||||
toolUseBlock.thoughtSignature = functionCallSignature;
|
||||
// Cache for future requests (Claude Code may strip this field)
|
||||
cacheSignature(toolId, functionCallSignature);
|
||||
}
|
||||
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
index: blockIndex,
|
||||
content_block: toolUseBlock
|
||||
};
|
||||
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: blockIndex,
|
||||
delta: {
|
||||
type: 'input_json_delta',
|
||||
partial_json: JSON.stringify(part.functionCall.args || {})
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Check finish reason
|
||||
if (firstCandidate.finishReason) {
|
||||
if (firstCandidate.finishReason === 'MAX_TOKENS') {
|
||||
stopReason = 'max_tokens';
|
||||
} else if (firstCandidate.finishReason === 'STOP') {
|
||||
stopReason = 'end_turn';
|
||||
}
|
||||
}
|
||||
|
||||
} catch (parseError) {
|
||||
logger.warn('[CloudCode] SSE parse error:', parseError.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle no content received
|
||||
if (!hasEmittedStart) {
|
||||
logger.warn('[CloudCode] No content parts received, emitting empty message');
|
||||
yield {
|
||||
type: 'message_start',
|
||||
message: {
|
||||
id: messageId,
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: [],
|
||||
model: originalModel,
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: inputTokens - cacheReadTokens,
|
||||
output_tokens: 0,
|
||||
cache_read_input_tokens: cacheReadTokens,
|
||||
cache_creation_input_tokens: 0
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
yield {
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
content_block: { type: 'text', text: '' }
|
||||
};
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'text_delta', text: '[No response received from API]' }
|
||||
};
|
||||
yield { type: 'content_block_stop', index: 0 };
|
||||
} else {
|
||||
// Close any open block
|
||||
if (currentBlockType !== null) {
|
||||
if (currentBlockType === 'thinking' && currentThinkingSignature) {
|
||||
yield {
|
||||
type: 'content_block_delta',
|
||||
index: blockIndex,
|
||||
delta: { type: 'signature_delta', signature: currentThinkingSignature }
|
||||
};
|
||||
}
|
||||
yield { type: 'content_block_stop', index: blockIndex };
|
||||
}
|
||||
}
|
||||
|
||||
// Emit message_delta and message_stop
|
||||
yield {
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: stopReason, stop_sequence: null },
|
||||
usage: {
|
||||
output_tokens: outputTokens,
|
||||
cache_read_input_tokens: cacheReadTokens,
|
||||
cache_creation_input_tokens: 0
|
||||
}
|
||||
};
|
||||
|
||||
yield { type: 'message_stop' };
|
||||
}
|
||||
199
src/cloudcode/streaming-handler.js
Normal file
199
src/cloudcode/streaming-handler.js
Normal file
@@ -0,0 +1,199 @@
|
||||
/**
|
||||
* Streaming Handler for Cloud Code
|
||||
*
|
||||
* Handles streaming message requests with multi-account support,
|
||||
* retry logic, and endpoint failover.
|
||||
*/
|
||||
|
||||
import {
|
||||
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
||||
MAX_RETRIES,
|
||||
MAX_WAIT_BEFORE_ERROR_MS
|
||||
} from '../constants.js';
|
||||
import { isRateLimitError, isAuthError } from '../errors.js';
|
||||
import { formatDuration, sleep } from '../utils/helpers.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { parseResetTime } from './rate-limit-parser.js';
|
||||
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
||||
import { streamSSEResponse } from './sse-streamer.js';
|
||||
|
||||
/**
|
||||
* Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED)
|
||||
* @deprecated Use isRateLimitError from errors.js instead
|
||||
*/
|
||||
function is429Error(error) {
|
||||
return isRateLimitError(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is an auth-invalid error (credentials need re-authentication)
|
||||
* @deprecated Use isAuthError from errors.js instead
|
||||
*/
|
||||
function isAuthInvalidError(error) {
|
||||
return isAuthError(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a streaming request to Cloud Code with multi-account support
|
||||
* Streams events in real-time as they arrive from the server
|
||||
*
|
||||
* @param {Object} anthropicRequest - The Anthropic-format request
|
||||
* @param {string} anthropicRequest.model - Model name to use
|
||||
* @param {Array} anthropicRequest.messages - Array of message objects
|
||||
* @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
|
||||
* @param {Object} [anthropicRequest.thinking] - Thinking configuration
|
||||
* @param {import('../account-manager/index.js').default} accountManager - The account manager instance
|
||||
* @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
|
||||
* @throws {Error} If max retries exceeded or no accounts available
|
||||
*/
|
||||
export async function* sendMessageStream(anthropicRequest, accountManager) {
|
||||
const model = anthropicRequest.model;
|
||||
|
||||
// Retry loop with account failover
|
||||
// Ensure we try at least as many times as there are accounts to cycle through everyone
|
||||
// +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
|
||||
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
// Use sticky account selection for cache continuity
|
||||
const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
|
||||
let account = stickyAccount;
|
||||
|
||||
// Handle waiting for sticky account
|
||||
if (!account && waitMs > 0) {
|
||||
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
|
||||
await sleep(waitMs);
|
||||
accountManager.clearExpiredLimits();
|
||||
account = accountManager.getCurrentStickyAccount();
|
||||
}
|
||||
|
||||
// Handle all accounts rate-limited
|
||||
if (!account) {
|
||||
if (accountManager.isAllRateLimited()) {
|
||||
const allWaitMs = accountManager.getMinWaitTimeMs();
|
||||
const resetTime = new Date(Date.now() + allWaitMs).toISOString();
|
||||
|
||||
// If wait time is too long (> 2 minutes), throw error immediately
|
||||
if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
||||
throw new Error(
|
||||
`RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for reset (applies to both single and multi-account modes)
|
||||
const accountCount = accountManager.getAccountCount();
|
||||
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
|
||||
await sleep(allWaitMs);
|
||||
accountManager.clearExpiredLimits();
|
||||
account = accountManager.pickNext();
|
||||
}
|
||||
|
||||
if (!account) {
|
||||
throw new Error('No accounts available');
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Get token and project for this account
|
||||
const token = await accountManager.getTokenForAccount(account);
|
||||
const project = await accountManager.getProjectForAccount(account, token);
|
||||
const payload = buildCloudCodeRequest(anthropicRequest, project);
|
||||
|
||||
logger.debug(`[CloudCode] Starting stream for model: ${model}`);
|
||||
|
||||
// Try each endpoint for streaming
|
||||
let lastError = null;
|
||||
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
||||
try {
|
||||
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: buildHeaders(token, model, 'text/event-stream'),
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
|
||||
|
||||
if (response.status === 401) {
|
||||
// Auth error - clear caches and retry
|
||||
accountManager.clearTokenCache(account.email);
|
||||
accountManager.clearProjectCache(account.email);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response.status === 429) {
|
||||
// Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
|
||||
logger.debug(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`);
|
||||
const resetMs = parseResetTime(response, errorText);
|
||||
// Keep minimum reset time across all 429 responses
|
||||
if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
|
||||
lastError = { is429: true, response, errorText, resetMs };
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
||||
|
||||
// If it's a 5xx error, wait a bit before trying the next endpoint
|
||||
if (response.status >= 500) {
|
||||
logger.warn(`[CloudCode] ${response.status} stream error, waiting 1s before retry...`);
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Stream the response - yield events as they arrive
|
||||
yield* streamSSEResponse(response, anthropicRequest.model);
|
||||
|
||||
logger.debug('[CloudCode] Stream completed');
|
||||
return;
|
||||
|
||||
} catch (endpointError) {
|
||||
if (is429Error(endpointError)) {
|
||||
throw endpointError; // Re-throw to trigger account switch
|
||||
}
|
||||
logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
|
||||
lastError = endpointError;
|
||||
}
|
||||
}
|
||||
|
||||
// If all endpoints failed for this account
|
||||
if (lastError) {
|
||||
// If all endpoints returned 429, mark account as rate-limited
|
||||
if (lastError.is429) {
|
||||
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
|
||||
accountManager.markRateLimited(account.email, lastError.resetMs);
|
||||
throw new Error(`Rate limited: ${lastError.errorText}`);
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (is429Error(error)) {
|
||||
// Rate limited - already marked, continue to next account
|
||||
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
|
||||
continue;
|
||||
}
|
||||
if (isAuthInvalidError(error)) {
|
||||
// Auth invalid - already marked, continue to next account
|
||||
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
|
||||
continue;
|
||||
}
|
||||
// Non-rate-limit error: throw immediately
|
||||
// UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
|
||||
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
||||
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
|
||||
accountManager.pickNext(); // Force advance to next account
|
||||
continue;
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Max retries exceeded');
|
||||
}
|
||||
Reference in New Issue
Block a user