refactor: Reorganize src/ into modular folder structure

Split large monolithic files into focused modules: - cloudcode-client.js (1,107 lines) → src/cloudcode/ (9 files) - account-manager.js (639 lines) → src/account-manager/ (5 files) - Move auth files to src/auth/ (oauth, token-extractor, database) - Move CLI to src/cli/accounts.js Update all import paths and documentation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-01 15:13:43 +05:30
parent 1d91bc0d30
commit f02364d4ef
23 changed files with 2235 additions and 1784 deletions
--- a/src/cloudcode/index.js
+++ b/src/cloudcode/index.js
@@ -0,0 +1,28 @@
+/**
+ * Cloud Code Client for Antigravity
+ *
+ * Communicates with Google's Cloud Code internal API using the
+ * v1internal:streamGenerateContent endpoint with proper request wrapping.
+ *
+ * Supports multi-account load balancing with automatic failover.
+ *
+ * Based on: https://github.com/NoeFabris/opencode-antigravity-auth
+ */
+
+// Re-export public API
+export { sendMessage } from './message-handler.js';
+export { sendMessageStream } from './streaming-handler.js';
+export { listModels, fetchAvailableModels, getModelQuotas } from './model-api.js';
+
+// Default export for backwards compatibility
+import { sendMessage } from './message-handler.js';
+import { sendMessageStream } from './streaming-handler.js';
+import { listModels, fetchAvailableModels, getModelQuotas } from './model-api.js';
+
+export default {
+    sendMessage,
+    sendMessageStream,
+    listModels,
+    fetchAvailableModels,
+    getModelQuotas
+};
--- a/src/cloudcode/message-handler.js
+++ b/src/cloudcode/message-handler.js
@@ -0,0 +1,209 @@
+/**
+ * Message Handler for Cloud Code
+ *
+ * Handles non-streaming message requests with multi-account support,
+ * retry logic, and endpoint failover.
+ */
+
+import {
+    ANTIGRAVITY_ENDPOINT_FALLBACKS,
+    MAX_RETRIES,
+    MAX_WAIT_BEFORE_ERROR_MS,
+    isThinkingModel
+} from '../constants.js';
+import { convertGoogleToAnthropic } from '../format/index.js';
+import { isRateLimitError, isAuthError } from '../errors.js';
+import { formatDuration, sleep } from '../utils/helpers.js';
+import { logger } from '../utils/logger.js';
+import { parseResetTime } from './rate-limit-parser.js';
+import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
+import { parseThinkingSSEResponse } from './sse-parser.js';
+
+/**
+ * Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED)
+ * @deprecated Use isRateLimitError from errors.js instead
+ */
+function is429Error(error) {
+    return isRateLimitError(error);
+}
+
+/**
+ * Check if an error is an auth-invalid error (credentials need re-authentication)
+ * @deprecated Use isAuthError from errors.js instead
+ */
+function isAuthInvalidError(error) {
+    return isAuthError(error);
+}
+
+/**
+ * Send a non-streaming request to Cloud Code with multi-account support
+ * Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
+ *
+ * @param {Object} anthropicRequest - The Anthropic-format request
+ * @param {Object} anthropicRequest.model - Model name to use
+ * @param {Array} anthropicRequest.messages - Array of message objects
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
+ * @param {import('../account-manager/index.js').default} accountManager - The account manager instance
+ * @returns {Promise<Object>} Anthropic-format response object
+ * @throws {Error} If max retries exceeded or no accounts available
+ */
+export async function sendMessage(anthropicRequest, accountManager) {
+    const model = anthropicRequest.model;
+    const isThinking = isThinkingModel(model);
+
+    // Retry loop with account failover
+    // Ensure we try at least as many times as there are accounts to cycle through everyone
+    // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
+    const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
+
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+        // Use sticky account selection for cache continuity
+        const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
+        let account = stickyAccount;
+
+        // Handle waiting for sticky account
+        if (!account && waitMs > 0) {
+            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
+            await sleep(waitMs);
+            accountManager.clearExpiredLimits();
+            account = accountManager.getCurrentStickyAccount();
+        }
+
+        // Handle all accounts rate-limited
+        if (!account) {
+            if (accountManager.isAllRateLimited()) {
+                const allWaitMs = accountManager.getMinWaitTimeMs();
+                const resetTime = new Date(Date.now() + allWaitMs).toISOString();
+
+                // If wait time is too long (> 2 minutes), throw error immediately
+                if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
+                    throw new Error(
+                        `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
+                    );
+                }
+
+                // Wait for reset (applies to both single and multi-account modes)
+                const accountCount = accountManager.getAccountCount();
+                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
+                await sleep(allWaitMs);
+                accountManager.clearExpiredLimits();
+                account = accountManager.pickNext();
+            }
+
+            if (!account) {
+                throw new Error('No accounts available');
+            }
+        }
+
+        try {
+            // Get token and project for this account
+            const token = await accountManager.getTokenForAccount(account);
+            const project = await accountManager.getProjectForAccount(account, token);
+            const payload = buildCloudCodeRequest(anthropicRequest, project);
+
+            logger.debug(`[CloudCode] Sending request for model: ${model}`);
+
+            // Try each endpoint
+            let lastError = null;
+            for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
+                try {
+                    const url = isThinking
+                        ? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
+                        : `${endpoint}/v1internal:generateContent`;
+
+                    const response = await fetch(url, {
+                        method: 'POST',
+                        headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
+                        body: JSON.stringify(payload)
+                    });
+
+                    if (!response.ok) {
+                        const errorText = await response.text();
+                        logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
+
+                        if (response.status === 401) {
+                            // Auth error - clear caches and retry with fresh token
+                            logger.warn('[CloudCode] Auth error, refreshing token...');
+                            accountManager.clearTokenCache(account.email);
+                            accountManager.clearProjectCache(account.email);
+                            continue;
+                        }
+
+                        if (response.status === 429) {
+                            // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
+                            logger.debug(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`);
+                            const resetMs = parseResetTime(response, errorText);
+                            // Keep minimum reset time across all 429 responses
+                            if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
+                                lastError = { is429: true, response, errorText, resetMs };
+                            }
+                            continue;
+                        }
+
+                        if (response.status >= 400) {
+                            lastError = new Error(`API error ${response.status}: ${errorText}`);
+                            // If it's a 5xx error, wait a bit before trying the next endpoint
+                            if (response.status >= 500) {
+                                logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
+                                await sleep(1000);
+                            }
+                            continue;
+                        }
+                    }
+
+                    // For thinking models, parse SSE and accumulate all parts
+                    if (isThinking) {
+                        return await parseThinkingSSEResponse(response, anthropicRequest.model);
+                    }
+
+                    // Non-thinking models use regular JSON
+                    const data = await response.json();
+                    logger.debug('[CloudCode] Response received');
+                    return convertGoogleToAnthropic(data, anthropicRequest.model);
+
+                } catch (endpointError) {
+                    if (is429Error(endpointError)) {
+                        throw endpointError; // Re-throw to trigger account switch
+                    }
+                    logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
+                    lastError = endpointError;
+                }
+            }
+
+            // If all endpoints failed for this account
+            if (lastError) {
+                // If all endpoints returned 429, mark account as rate-limited
+                if (lastError.is429) {
+                    logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
+                    accountManager.markRateLimited(account.email, lastError.resetMs);
+                    throw new Error(`Rate limited: ${lastError.errorText}`);
+                }
+                throw lastError;
+            }
+
+        } catch (error) {
+            if (is429Error(error)) {
+                // Rate limited - already marked, continue to next account
+                logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
+                continue;
+            }
+            if (isAuthInvalidError(error)) {
+                // Auth invalid - already marked, continue to next account
+                logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
+                continue;
+            }
+            // Non-rate-limit error: throw immediately
+            // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
+            if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
+                logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
+                accountManager.pickNext(); // Force advance to next account
+                continue;
+            }
+
+            throw error;
+        }
+    }
+
+    throw new Error('Max retries exceeded');
+}
--- a/src/cloudcode/model-api.js
+++ b/src/cloudcode/model-api.js
@@ -0,0 +1,97 @@
+/**
+ * Model API for Cloud Code
+ *
+ * Handles model listing and quota retrieval from the Cloud Code API.
+ */
+
+import { ANTIGRAVITY_ENDPOINT_FALLBACKS, ANTIGRAVITY_HEADERS } from '../constants.js';
+import { logger } from '../utils/logger.js';
+
+/**
+ * List available models in Anthropic API format
+ * Fetches models dynamically from the Cloud Code API
+ *
+ * @param {string} token - OAuth access token
+ * @returns {Promise<{object: string, data: Array<{id: string, object: string, created: number, owned_by: string, description: string}>}>} List of available models
+ */
+export async function listModels(token) {
+    const data = await fetchAvailableModels(token);
+    if (!data || !data.models) {
+        return { object: 'list', data: [] };
+    }
+
+    const modelList = Object.entries(data.models).map(([modelId, modelData]) => ({
+        id: modelId,
+        object: 'model',
+        created: Math.floor(Date.now() / 1000),
+        owned_by: 'anthropic',
+        description: modelData.displayName || modelId
+    }));
+
+    return {
+        object: 'list',
+        data: modelList
+    };
+}
+
+/**
+ * Fetch available models with quota info from Cloud Code API
+ * Returns model quotas including remaining fraction and reset time
+ *
+ * @param {string} token - OAuth access token
+ * @returns {Promise<Object>} Raw response from fetchAvailableModels API
+ */
+export async function fetchAvailableModels(token) {
+    const headers = {
+        'Authorization': `Bearer ${token}`,
+        'Content-Type': 'application/json',
+        ...ANTIGRAVITY_HEADERS
+    };
+
+    for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
+        try {
+            const url = `${endpoint}/v1internal:fetchAvailableModels`;
+            const response = await fetch(url, {
+                method: 'POST',
+                headers,
+                body: JSON.stringify({})
+            });
+
+            if (!response.ok) {
+                const errorText = await response.text();
+                logger.warn(`[CloudCode] fetchAvailableModels error at ${endpoint}: ${response.status}`);
+                continue;
+            }
+
+            return await response.json();
+        } catch (error) {
+            logger.warn(`[CloudCode] fetchAvailableModels failed at ${endpoint}:`, error.message);
+        }
+    }
+
+    throw new Error('Failed to fetch available models from all endpoints');
+}
+
+/**
+ * Get model quotas for an account
+ * Extracts quota info (remaining fraction and reset time) for each model
+ *
+ * @param {string} token - OAuth access token
+ * @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
+ */
+export async function getModelQuotas(token) {
+    const data = await fetchAvailableModels(token);
+    if (!data || !data.models) return {};
+
+    const quotas = {};
+    for (const [modelId, modelData] of Object.entries(data.models)) {
+        if (modelData.quotaInfo) {
+            quotas[modelId] = {
+                remainingFraction: modelData.quotaInfo.remainingFraction ?? null,
+                resetTime: modelData.quotaInfo.resetTime ?? null
+            };
+        }
+    }
+
+    return quotas;
+}
--- a/src/cloudcode/rate-limit-parser.js
+++ b/src/cloudcode/rate-limit-parser.js
@@ -0,0 +1,181 @@
+/**
+ * Rate Limit Parser for Cloud Code
+ *
+ * Parses reset times from HTTP headers and error messages.
+ * Supports various formats: Retry-After, x-ratelimit-reset,
+ * quotaResetDelay, quotaResetTimeStamp, and duration strings.
+ */
+
+import { formatDuration } from '../utils/helpers.js';
+import { logger } from '../utils/logger.js';
+
+/**
+ * Parse reset time from HTTP response or error
+ * Checks headers first, then error message body
+ * Returns milliseconds or null if not found
+ *
+ * @param {Response|Error} responseOrError - HTTP Response object or Error
+ * @param {string} errorText - Optional error body text
+ */
+export function parseResetTime(responseOrError, errorText = '') {
+    let resetMs = null;
+
+    // If it's a Response object, check headers first
+    if (responseOrError && typeof responseOrError.headers?.get === 'function') {
+        const headers = responseOrError.headers;
+
+        // Standard Retry-After header (seconds or HTTP date)
+        const retryAfter = headers.get('retry-after');
+        if (retryAfter) {
+            const seconds = parseInt(retryAfter, 10);
+            if (!isNaN(seconds)) {
+                resetMs = seconds * 1000;
+                logger.debug(`[CloudCode] Retry-After header: ${seconds}s`);
+            } else {
+                // Try parsing as HTTP date
+                const date = new Date(retryAfter);
+                if (!isNaN(date.getTime())) {
+                    resetMs = date.getTime() - Date.now();
+                    if (resetMs > 0) {
+                        logger.debug(`[CloudCode] Retry-After date: ${retryAfter}`);
+                    } else {
+                        resetMs = null;
+                    }
+                }
+            }
+        }
+
+        // x-ratelimit-reset (Unix timestamp in seconds)
+        if (!resetMs) {
+            const ratelimitReset = headers.get('x-ratelimit-reset');
+            if (ratelimitReset) {
+                const resetTimestamp = parseInt(ratelimitReset, 10) * 1000;
+                resetMs = resetTimestamp - Date.now();
+                if (resetMs > 0) {
+                    logger.debug(`[CloudCode] x-ratelimit-reset: ${new Date(resetTimestamp).toISOString()}`);
+                } else {
+                    resetMs = null;
+                }
+            }
+        }
+
+        // x-ratelimit-reset-after (seconds)
+        if (!resetMs) {
+            const resetAfter = headers.get('x-ratelimit-reset-after');
+            if (resetAfter) {
+                const seconds = parseInt(resetAfter, 10);
+                if (!isNaN(seconds) && seconds > 0) {
+                    resetMs = seconds * 1000;
+                    logger.debug(`[CloudCode] x-ratelimit-reset-after: ${seconds}s`);
+                }
+            }
+        }
+    }
+
+    // If no header found, try parsing from error message/body
+    if (!resetMs) {
+        const msg = (responseOrError instanceof Error ? responseOrError.message : errorText) || '';
+
+        // Try to extract "quotaResetDelay" first (e.g. "754.431528ms" or "1.5s")
+        // This is Google's preferred format for rate limit reset delay
+        const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\\d+(?:\\.\\d+)?)(ms|s)/i);
+        if (quotaDelayMatch) {
+            const value = parseFloat(quotaDelayMatch[1]);
+            const unit = quotaDelayMatch[2].toLowerCase();
+            resetMs = unit === 's' ? Math.ceil(value * 1000) : Math.ceil(value);
+            logger.debug(`[CloudCode] Parsed quotaResetDelay from body: ${resetMs}ms`);
+        }
+
+        // Try to extract "quotaResetTimeStamp" (ISO format like "2025-12-31T07:00:47Z")
+        if (!resetMs) {
+            const quotaTimestampMatch = msg.match(/quotaResetTimeStamp[:\s"]+(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)/i);
+            if (quotaTimestampMatch) {
+                const resetTime = new Date(quotaTimestampMatch[1]).getTime();
+                if (!isNaN(resetTime)) {
+                    resetMs = resetTime - Date.now();
+                    // Even if expired or 0, we found a timestamp, so rely on it.
+                    // But if it's negative, it means "now", so treat as small wait.
+                    logger.debug(`[CloudCode] Parsed quotaResetTimeStamp: ${quotaTimestampMatch[1]} (Delta: ${resetMs}ms)`);
+                }
+            }
+        }
+
+        // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
+        // Added stricter regex to avoid partial matches
+        if (!resetMs) {
+             const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\\d\\.]+)(?:s\b|s")/i);
+             if (secMatch) {
+                 resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
+                 logger.debug(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);
+             }
+        }
+
+        if (!resetMs) {
+            // Check for ms (explicit "ms" suffix or implicit if no suffix)
+            const msMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+(\d+)(?:\s*ms)?(?![\w.])/i);
+            if (msMatch) {
+                resetMs = parseInt(msMatch[1], 10);
+                logger.debug(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`);
+            }
+        }
+
+        // Try to extract seconds value like "retry after 60 seconds"
+        if (!resetMs) {
+            const secMatch = msg.match(/retry\s+(?:after\s+)?(\d+)\s*(?:sec|s\b)/i);
+            if (secMatch) {
+                resetMs = parseInt(secMatch[1], 10) * 1000;
+                logger.debug(`[CloudCode] Parsed retry seconds from body: ${secMatch[1]}s`);
+            }
+        }
+
+        // Try to extract duration like "1h23m45s" or "23m45s" or "45s"
+        if (!resetMs) {
+            const durationMatch = msg.match(/(\d+)h(\d+)m(\d+)s|(\d+)m(\d+)s|(\d+)s/i);
+            if (durationMatch) {
+                if (durationMatch[1]) {
+                    const hours = parseInt(durationMatch[1], 10);
+                    const minutes = parseInt(durationMatch[2], 10);
+                    const seconds = parseInt(durationMatch[3], 10);
+                    resetMs = (hours * 3600 + minutes * 60 + seconds) * 1000;
+                } else if (durationMatch[4]) {
+                    const minutes = parseInt(durationMatch[4], 10);
+                    const seconds = parseInt(durationMatch[5], 10);
+                    resetMs = (minutes * 60 + seconds) * 1000;
+                } else if (durationMatch[6]) {
+                    resetMs = parseInt(durationMatch[6], 10) * 1000;
+                }
+                if (resetMs) {
+                    logger.debug(`[CloudCode] Parsed duration from body: ${formatDuration(resetMs)}`);
+                }
+            }
+        }
+
+        // Try to extract ISO timestamp or Unix timestamp
+        if (!resetMs) {
+            const isoMatch = msg.match(/reset[:\s"]+(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)/i);
+            if (isoMatch) {
+                const resetTime = new Date(isoMatch[1]).getTime();
+                if (!isNaN(resetTime)) {
+                    resetMs = resetTime - Date.now();
+                    if (resetMs > 0) {
+                        logger.debug(`[CloudCode] Parsed ISO reset time: ${isoMatch[1]}`);
+                    } else {
+                        resetMs = null;
+                    }
+                }
+            }
+        }
+    }
+
+    // SANITY CHECK: Enforce strict minimums for found rate limits
+    // If we found a reset time, but it's very small (e.g. < 1s) or negative,
+    // explicitly bump it up to avoid "Available in 0s" loops.
+    if (resetMs !== null) {
+        if (resetMs < 1000) {
+            logger.debug(`[CloudCode] Reset time too small (${resetMs}ms), enforcing 2s buffer`);
+            resetMs = 2000;
+        }
+    }
+
+    return resetMs;
+}
--- a/src/cloudcode/request-builder.js
+++ b/src/cloudcode/request-builder.js
@@ -0,0 +1,68 @@
+/**
+ * Request Builder for Cloud Code
+ *
+ * Builds request payloads and headers for the Cloud Code API.
+ */
+
+import crypto from 'crypto';
+import {
+    ANTIGRAVITY_HEADERS,
+    getModelFamily,
+    isThinkingModel
+} from '../constants.js';
+import { convertAnthropicToGoogle } from '../format/index.js';
+import { deriveSessionId } from './session-manager.js';
+
+/**
+ * Build the wrapped request body for Cloud Code API
+ *
+ * @param {Object} anthropicRequest - The Anthropic-format request
+ * @param {string} projectId - The project ID to use
+ * @returns {Object} The Cloud Code API request payload
+ */
+export function buildCloudCodeRequest(anthropicRequest, projectId) {
+    const model = anthropicRequest.model;
+    const googleRequest = convertAnthropicToGoogle(anthropicRequest);
+
+    // Use stable session ID derived from first user message for cache continuity
+    googleRequest.sessionId = deriveSessionId(anthropicRequest);
+
+    const payload = {
+        project: projectId,
+        model: model,
+        request: googleRequest,
+        userAgent: 'antigravity',
+        requestId: 'agent-' + crypto.randomUUID()
+    };
+
+    return payload;
+}
+
+/**
+ * Build headers for Cloud Code API requests
+ *
+ * @param {string} token - OAuth access token
+ * @param {string} model - Model name
+ * @param {string} accept - Accept header value (default: 'application/json')
+ * @returns {Object} Headers object
+ */
+export function buildHeaders(token, model, accept = 'application/json') {
+    const headers = {
+        'Authorization': `Bearer ${token}`,
+        'Content-Type': 'application/json',
+        ...ANTIGRAVITY_HEADERS
+    };
+
+    const modelFamily = getModelFamily(model);
+
+    // Add interleaved thinking header only for Claude thinking models
+    if (modelFamily === 'claude' && isThinkingModel(model)) {
+        headers['anthropic-beta'] = 'interleaved-thinking-2025-05-14';
+    }
+
+    if (accept !== 'application/json') {
+        headers['Accept'] = accept;
+    }
+
+    return headers;
+}
--- a/src/cloudcode/session-manager.js
+++ b/src/cloudcode/session-manager.js
@@ -0,0 +1,47 @@
+/**
+ * Session Management for Cloud Code
+ *
+ * Handles session ID derivation for prompt caching continuity.
+ * Session IDs are derived from the first user message to ensure
+ * the same conversation uses the same session across turns.
+ */
+
+import crypto from 'crypto';
+
+/**
+ * Derive a stable session ID from the first user message in the conversation.
+ * This ensures the same conversation uses the same session ID across turns,
+ * enabling prompt caching (cache is scoped to session + organization).
+ *
+ * @param {Object} anthropicRequest - The Anthropic-format request
+ * @returns {string} A stable session ID (32 hex characters) or random UUID if no user message
+ */
+export function deriveSessionId(anthropicRequest) {
+    const messages = anthropicRequest.messages || [];
+
+    // Find the first user message
+    for (const msg of messages) {
+        if (msg.role === 'user') {
+            let content = '';
+
+            if (typeof msg.content === 'string') {
+                content = msg.content;
+            } else if (Array.isArray(msg.content)) {
+                // Extract text from content blocks
+                content = msg.content
+                    .filter(block => block.type === 'text' && block.text)
+                    .map(block => block.text)
+                    .join('\n');
+            }
+
+            if (content) {
+                // Hash the content with SHA256, return first 32 hex chars
+                const hash = crypto.createHash('sha256').update(content).digest('hex');
+                return hash.substring(0, 32);
+            }
+        }
+    }
+
+    // Fallback to random UUID if no user message found
+    return crypto.randomUUID();
+}
--- a/src/cloudcode/sse-parser.js
+++ b/src/cloudcode/sse-parser.js
@@ -0,0 +1,116 @@
+/**
+ * SSE Parser for Cloud Code
+ *
+ * Parses SSE responses for non-streaming thinking models.
+ * Accumulates all parts and returns a single response.
+ */
+
+import { convertGoogleToAnthropic } from '../format/index.js';
+import { logger } from '../utils/logger.js';
+
+/**
+ * Parse SSE response for thinking models and accumulate all parts
+ *
+ * @param {Response} response - The HTTP response with SSE body
+ * @param {string} originalModel - The original model name
+ * @returns {Promise<Object>} Anthropic-format response object
+ */
+export async function parseThinkingSSEResponse(response, originalModel) {
+    let accumulatedThinkingText = '';
+    let accumulatedThinkingSignature = '';
+    let accumulatedText = '';
+    const finalParts = [];
+    let usageMetadata = {};
+    let finishReason = 'STOP';
+
+    const flushThinking = () => {
+        if (accumulatedThinkingText) {
+            finalParts.push({
+                thought: true,
+                text: accumulatedThinkingText,
+                thoughtSignature: accumulatedThinkingSignature
+            });
+            accumulatedThinkingText = '';
+            accumulatedThinkingSignature = '';
+        }
+    };
+
+    const flushText = () => {
+        if (accumulatedText) {
+            finalParts.push({ text: accumulatedText });
+            accumulatedText = '';
+        }
+    };
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+
+    while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+
+        for (const line of lines) {
+            if (!line.startsWith('data:')) continue;
+            const jsonText = line.slice(5).trim();
+            if (!jsonText) continue;
+
+            try {
+                const data = JSON.parse(jsonText);
+                const innerResponse = data.response || data;
+
+                if (innerResponse.usageMetadata) {
+                    usageMetadata = innerResponse.usageMetadata;
+                }
+
+                const candidates = innerResponse.candidates || [];
+                const firstCandidate = candidates[0] || {};
+                if (firstCandidate.finishReason) {
+                    finishReason = firstCandidate.finishReason;
+                }
+
+                const parts = firstCandidate.content?.parts || [];
+                for (const part of parts) {
+                    if (part.thought === true) {
+                        flushText();
+                        accumulatedThinkingText += (part.text || '');
+                        if (part.thoughtSignature) {
+                            accumulatedThinkingSignature = part.thoughtSignature;
+                        }
+                    } else if (part.functionCall) {
+                        flushThinking();
+                        flushText();
+                        finalParts.push(part);
+                    } else if (part.text !== undefined) {
+                        if (!part.text) continue;
+                        flushThinking();
+                        accumulatedText += part.text;
+                    }
+                }
+            } catch (e) {
+                logger.debug('[CloudCode] SSE parse warning:', e.message, 'Raw:', jsonText.slice(0, 100));
+            }
+        }
+    }
+
+    flushThinking();
+    flushText();
+
+    const accumulatedResponse = {
+        candidates: [{ content: { parts: finalParts }, finishReason }],
+        usageMetadata
+    };
+
+    const partTypes = finalParts.map(p => p.thought ? 'thought' : (p.functionCall ? 'functionCall' : 'text'));
+    logger.debug('[CloudCode] Response received (SSE), part types:', partTypes);
+    if (finalParts.some(p => p.thought)) {
+        const thinkingPart = finalParts.find(p => p.thought);
+        logger.debug('[CloudCode] Thinking signature length:', thinkingPart?.thoughtSignature?.length || 0);
+    }
+
+    return convertGoogleToAnthropic(accumulatedResponse, originalModel);
+}
--- a/src/cloudcode/sse-streamer.js
+++ b/src/cloudcode/sse-streamer.js
@@ -0,0 +1,285 @@
+/**
+ * SSE Streamer for Cloud Code
+ *
+ * Streams SSE events in real-time, converting Google format to Anthropic format.
+ * Handles thinking blocks, text blocks, and tool use blocks.
+ */
+
+import crypto from 'crypto';
+import { MIN_SIGNATURE_LENGTH } from '../constants.js';
+import { cacheSignature } from '../format/signature-cache.js';
+import { logger } from '../utils/logger.js';
+
+/**
+ * Stream SSE response and yield Anthropic-format events
+ *
+ * @param {Response} response - The HTTP response with SSE body
+ * @param {string} originalModel - The original model name
+ * @yields {Object} Anthropic-format SSE events
+ */
+export async function* streamSSEResponse(response, originalModel) {
+    const messageId = `msg_${crypto.randomBytes(16).toString('hex')}`;
+    let hasEmittedStart = false;
+    let blockIndex = 0;
+    let currentBlockType = null;
+    let currentThinkingSignature = '';
+    let inputTokens = 0;
+    let outputTokens = 0;
+    let cacheReadTokens = 0;
+    let stopReason = 'end_turn';
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+
+    while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+
+        for (const line of lines) {
+            if (!line.startsWith('data:')) continue;
+
+            const jsonText = line.slice(5).trim();
+            if (!jsonText) continue;
+
+            try {
+                const data = JSON.parse(jsonText);
+                const innerResponse = data.response || data;
+
+                // Extract usage metadata (including cache tokens)
+                const usage = innerResponse.usageMetadata;
+                if (usage) {
+                    inputTokens = usage.promptTokenCount || inputTokens;
+                    outputTokens = usage.candidatesTokenCount || outputTokens;
+                    cacheReadTokens = usage.cachedContentTokenCount || cacheReadTokens;
+                }
+
+                const candidates = innerResponse.candidates || [];
+                const firstCandidate = candidates[0] || {};
+                const content = firstCandidate.content || {};
+                const parts = content.parts || [];
+
+                // Emit message_start on first data
+                // Note: input_tokens = promptTokenCount - cachedContentTokenCount (Antigravity includes cached in total)
+                if (!hasEmittedStart && parts.length > 0) {
+                    hasEmittedStart = true;
+                    yield {
+                        type: 'message_start',
+                        message: {
+                            id: messageId,
+                            type: 'message',
+                            role: 'assistant',
+                            content: [],
+                            model: originalModel,
+                            stop_reason: null,
+                            stop_sequence: null,
+                            usage: {
+                                input_tokens: inputTokens - cacheReadTokens,
+                                output_tokens: 0,
+                                cache_read_input_tokens: cacheReadTokens,
+                                cache_creation_input_tokens: 0
+                            }
+                        }
+                    };
+                }
+
+                // Process each part
+                for (const part of parts) {
+                    if (part.thought === true) {
+                        // Handle thinking block
+                        const text = part.text || '';
+                        const signature = part.thoughtSignature || '';
+
+                        if (currentBlockType !== 'thinking') {
+                            if (currentBlockType !== null) {
+                                yield { type: 'content_block_stop', index: blockIndex };
+                                blockIndex++;
+                            }
+                            currentBlockType = 'thinking';
+                            currentThinkingSignature = '';
+                            yield {
+                                type: 'content_block_start',
+                                index: blockIndex,
+                                content_block: { type: 'thinking', thinking: '' }
+                            };
+                        }
+
+                        if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
+                            currentThinkingSignature = signature;
+                        }
+
+                        yield {
+                            type: 'content_block_delta',
+                            index: blockIndex,
+                            delta: { type: 'thinking_delta', thinking: text }
+                        };
+
+                    } else if (part.text !== undefined) {
+                        // Skip empty text parts
+                        if (!part.text || part.text.trim().length === 0) {
+                            continue;
+                        }
+
+                        // Handle regular text
+                        if (currentBlockType !== 'text') {
+                            if (currentBlockType === 'thinking' && currentThinkingSignature) {
+                                yield {
+                                    type: 'content_block_delta',
+                                    index: blockIndex,
+                                    delta: { type: 'signature_delta', signature: currentThinkingSignature }
+                                };
+                                currentThinkingSignature = '';
+                            }
+                            if (currentBlockType !== null) {
+                                yield { type: 'content_block_stop', index: blockIndex };
+                                blockIndex++;
+                            }
+                            currentBlockType = 'text';
+                            yield {
+                                type: 'content_block_start',
+                                index: blockIndex,
+                                content_block: { type: 'text', text: '' }
+                            };
+                        }
+
+                        yield {
+                            type: 'content_block_delta',
+                            index: blockIndex,
+                            delta: { type: 'text_delta', text: part.text }
+                        };
+
+                    } else if (part.functionCall) {
+                        // Handle tool use
+                        // For Gemini 3+, capture thoughtSignature from the functionCall part
+                        // The signature is a sibling to functionCall, not inside it
+                        const functionCallSignature = part.thoughtSignature || '';
+
+                        if (currentBlockType === 'thinking' && currentThinkingSignature) {
+                            yield {
+                                type: 'content_block_delta',
+                                index: blockIndex,
+                                delta: { type: 'signature_delta', signature: currentThinkingSignature }
+                            };
+                            currentThinkingSignature = '';
+                        }
+                        if (currentBlockType !== null) {
+                            yield { type: 'content_block_stop', index: blockIndex };
+                            blockIndex++;
+                        }
+                        currentBlockType = 'tool_use';
+                        stopReason = 'tool_use';
+
+                        const toolId = part.functionCall.id || `toolu_${crypto.randomBytes(12).toString('hex')}`;
+
+                        // For Gemini, include the thoughtSignature in the tool_use block
+                        // so it can be sent back in subsequent requests
+                        const toolUseBlock = {
+                            type: 'tool_use',
+                            id: toolId,
+                            name: part.functionCall.name,
+                            input: {}
+                        };
+
+                        // Store the signature in the tool_use block for later retrieval
+                        if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) {
+                            toolUseBlock.thoughtSignature = functionCallSignature;
+                            // Cache for future requests (Claude Code may strip this field)
+                            cacheSignature(toolId, functionCallSignature);
+                        }
+
+                        yield {
+                            type: 'content_block_start',
+                            index: blockIndex,
+                            content_block: toolUseBlock
+                        };
+
+                        yield {
+                            type: 'content_block_delta',
+                            index: blockIndex,
+                            delta: {
+                                type: 'input_json_delta',
+                                partial_json: JSON.stringify(part.functionCall.args || {})
+                            }
+                        };
+                    }
+                }
+
+                // Check finish reason
+                if (firstCandidate.finishReason) {
+                    if (firstCandidate.finishReason === 'MAX_TOKENS') {
+                        stopReason = 'max_tokens';
+                    } else if (firstCandidate.finishReason === 'STOP') {
+                        stopReason = 'end_turn';
+                    }
+                }
+
+            } catch (parseError) {
+                logger.warn('[CloudCode] SSE parse error:', parseError.message);
+            }
+        }
+    }
+
+    // Handle no content received
+    if (!hasEmittedStart) {
+        logger.warn('[CloudCode] No content parts received, emitting empty message');
+        yield {
+            type: 'message_start',
+            message: {
+                id: messageId,
+                type: 'message',
+                role: 'assistant',
+                content: [],
+                model: originalModel,
+                stop_reason: null,
+                stop_sequence: null,
+                usage: {
+                    input_tokens: inputTokens - cacheReadTokens,
+                    output_tokens: 0,
+                    cache_read_input_tokens: cacheReadTokens,
+                    cache_creation_input_tokens: 0
+                }
+            }
+        };
+
+        yield {
+            type: 'content_block_start',
+            index: 0,
+            content_block: { type: 'text', text: '' }
+        };
+        yield {
+            type: 'content_block_delta',
+            index: 0,
+            delta: { type: 'text_delta', text: '[No response received from API]' }
+        };
+        yield { type: 'content_block_stop', index: 0 };
+    } else {
+        // Close any open block
+        if (currentBlockType !== null) {
+            if (currentBlockType === 'thinking' && currentThinkingSignature) {
+                yield {
+                    type: 'content_block_delta',
+                    index: blockIndex,
+                    delta: { type: 'signature_delta', signature: currentThinkingSignature }
+                };
+            }
+            yield { type: 'content_block_stop', index: blockIndex };
+        }
+    }
+
+    // Emit message_delta and message_stop
+    yield {
+        type: 'message_delta',
+        delta: { stop_reason: stopReason, stop_sequence: null },
+        usage: {
+            output_tokens: outputTokens,
+            cache_read_input_tokens: cacheReadTokens,
+            cache_creation_input_tokens: 0
+        }
+    };
+
+    yield { type: 'message_stop' };
+}
--- a/src/cloudcode/streaming-handler.js
+++ b/src/cloudcode/streaming-handler.js
@@ -0,0 +1,199 @@
+/**
+ * Streaming Handler for Cloud Code
+ *
+ * Handles streaming message requests with multi-account support,
+ * retry logic, and endpoint failover.
+ */
+
+import {
+    ANTIGRAVITY_ENDPOINT_FALLBACKS,
+    MAX_RETRIES,
+    MAX_WAIT_BEFORE_ERROR_MS
+} from '../constants.js';
+import { isRateLimitError, isAuthError } from '../errors.js';
+import { formatDuration, sleep } from '../utils/helpers.js';
+import { logger } from '../utils/logger.js';
+import { parseResetTime } from './rate-limit-parser.js';
+import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
+import { streamSSEResponse } from './sse-streamer.js';
+
+/**
+ * Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED)
+ * @deprecated Use isRateLimitError from errors.js instead
+ */
+function is429Error(error) {
+    return isRateLimitError(error);
+}
+
+/**
+ * Check if an error is an auth-invalid error (credentials need re-authentication)
+ * @deprecated Use isAuthError from errors.js instead
+ */
+function isAuthInvalidError(error) {
+    return isAuthError(error);
+}
+
+/**
+ * Send a streaming request to Cloud Code with multi-account support
+ * Streams events in real-time as they arrive from the server
+ *
+ * @param {Object} anthropicRequest - The Anthropic-format request
+ * @param {string} anthropicRequest.model - Model name to use
+ * @param {Array} anthropicRequest.messages - Array of message objects
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
+ * @param {import('../account-manager/index.js').default} accountManager - The account manager instance
+ * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
+ * @throws {Error} If max retries exceeded or no accounts available
+ */
+export async function* sendMessageStream(anthropicRequest, accountManager) {
+    const model = anthropicRequest.model;
+
+    // Retry loop with account failover
+    // Ensure we try at least as many times as there are accounts to cycle through everyone
+    // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
+    const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
+
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+        // Use sticky account selection for cache continuity
+        const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
+        let account = stickyAccount;
+
+        // Handle waiting for sticky account
+        if (!account && waitMs > 0) {
+            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
+            await sleep(waitMs);
+            accountManager.clearExpiredLimits();
+            account = accountManager.getCurrentStickyAccount();
+        }
+
+        // Handle all accounts rate-limited
+        if (!account) {
+            if (accountManager.isAllRateLimited()) {
+                const allWaitMs = accountManager.getMinWaitTimeMs();
+                const resetTime = new Date(Date.now() + allWaitMs).toISOString();
+
+                // If wait time is too long (> 2 minutes), throw error immediately
+                if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
+                    throw new Error(
+                        `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
+                    );
+                }
+
+                // Wait for reset (applies to both single and multi-account modes)
+                const accountCount = accountManager.getAccountCount();
+                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
+                await sleep(allWaitMs);
+                accountManager.clearExpiredLimits();
+                account = accountManager.pickNext();
+            }
+
+            if (!account) {
+                throw new Error('No accounts available');
+            }
+        }
+
+        try {
+            // Get token and project for this account
+            const token = await accountManager.getTokenForAccount(account);
+            const project = await accountManager.getProjectForAccount(account, token);
+            const payload = buildCloudCodeRequest(anthropicRequest, project);
+
+            logger.debug(`[CloudCode] Starting stream for model: ${model}`);
+
+            // Try each endpoint for streaming
+            let lastError = null;
+            for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
+                try {
+                    const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
+
+                    const response = await fetch(url, {
+                        method: 'POST',
+                        headers: buildHeaders(token, model, 'text/event-stream'),
+                        body: JSON.stringify(payload)
+                    });
+
+                    if (!response.ok) {
+                        const errorText = await response.text();
+                        logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
+
+                        if (response.status === 401) {
+                            // Auth error - clear caches and retry
+                            accountManager.clearTokenCache(account.email);
+                            accountManager.clearProjectCache(account.email);
+                            continue;
+                        }
+
+                        if (response.status === 429) {
+                            // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
+                            logger.debug(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`);
+                            const resetMs = parseResetTime(response, errorText);
+                            // Keep minimum reset time across all 429 responses
+                            if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
+                                lastError = { is429: true, response, errorText, resetMs };
+                            }
+                            continue;
+                        }
+
+                        lastError = new Error(`API error ${response.status}: ${errorText}`);
+
+                        // If it's a 5xx error, wait a bit before trying the next endpoint
+                        if (response.status >= 500) {
+                            logger.warn(`[CloudCode] ${response.status} stream error, waiting 1s before retry...`);
+                            await sleep(1000);
+                        }
+
+                        continue;
+                    }
+
+                    // Stream the response - yield events as they arrive
+                    yield* streamSSEResponse(response, anthropicRequest.model);
+
+                    logger.debug('[CloudCode] Stream completed');
+                    return;
+
+                } catch (endpointError) {
+                    if (is429Error(endpointError)) {
+                        throw endpointError; // Re-throw to trigger account switch
+                    }
+                    logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
+                    lastError = endpointError;
+                }
+            }
+
+            // If all endpoints failed for this account
+            if (lastError) {
+                // If all endpoints returned 429, mark account as rate-limited
+                if (lastError.is429) {
+                    logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
+                    accountManager.markRateLimited(account.email, lastError.resetMs);
+                    throw new Error(`Rate limited: ${lastError.errorText}`);
+                }
+                throw lastError;
+            }
+
+        } catch (error) {
+            if (is429Error(error)) {
+                // Rate limited - already marked, continue to next account
+                logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
+                continue;
+            }
+            if (isAuthInvalidError(error)) {
+                // Auth invalid - already marked, continue to next account
+                logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
+                continue;
+            }
+            // Non-rate-limit error: throw immediately
+            // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
+            if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
+                logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
+                accountManager.pickNext(); // Force advance to next account
+                continue;
+            }
+
+            throw error;
+        }
+    }
+
+    throw new Error('Max retries exceeded');
+}