diff --git a/CLAUDE.md b/CLAUDE.md index c608e84..2d4d102 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -54,8 +54,9 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav - `request-converter.js` - Anthropic → Google request conversion - `response-converter.js` - Google → Anthropic response conversion - `content-converter.js` - Message content and role conversion - - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility - - `thinking-utils.js` - Thinking block validation, filtering, and reordering + - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility (preserves constraints/enums as hints) + - `thinking-utils.js` - Thinking block validation, filtering, reordering, and recovery logic + - `signature-cache.js` - In-memory cache for Gemini thoughtSignatures - **src/account-manager.js**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown - **src/oauth.js**: Google OAuth implementation for adding accounts - **src/token-extractor.js**: Extracts tokens from local Antigravity app installation (legacy single-account mode) @@ -94,8 +95,8 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav **Model Family Handling:** - `getModelFamily(model)` returns `'claude'` or `'gemini'` based on model name - Claude models use `signature` field on thinking blocks -- Gemini models use `thoughtSignature` field on functionCall parts -- When Claude Code strips `thoughtSignature`, the proxy uses Google's `skip_thought_signature_validator` sentinel value +- Gemini models use `thoughtSignature` field on functionCall parts (cached or sentinel value) +- When Claude Code strips `thoughtSignature`, the proxy tries to restore from cache, then falls back to `skip_thought_signature_validator` **Error Handling:** Use custom error classes from `src/errors.js`: - `RateLimitError` - 429/RESOURCE_EXHAUSTED errors diff --git a/src/cloudcode-client.js b/src/cloudcode-client.js index 89ae9e2..35ec3e6 100644 --- a/src/cloudcode-client.js +++ b/src/cloudcode-client.js @@ -23,6 +23,7 @@ import { convertAnthropicToGoogle, convertGoogleToAnthropic } from './format/index.js'; +import { cacheSignature } from './format/signature-cache.js'; import { formatDuration, sleep } from './utils/helpers.js'; import { isRateLimitError, isAuthError } from './errors.js'; @@ -848,6 +849,8 @@ async function* streamSSEResponse(response, originalModel) { // Store the signature in the tool_use block for later retrieval if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) { toolUseBlock.thoughtSignature = functionCallSignature; + // Cache for future requests (Claude Code may strip this field) + cacheSignature(toolId, functionCallSignature); } yield { diff --git a/src/constants.js b/src/constants.js index 8806531..d957923 100644 --- a/src/constants.js +++ b/src/constants.js @@ -87,6 +87,14 @@ export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature leng // Gemini-specific limits export const GEMINI_MAX_OUTPUT_TOKENS = 16384; +// Gemini signature handling +// Sentinel value to skip thought signature validation when Claude Code strips the field +// See: https://ai.google.dev/gemini-api/docs/thought-signatures +export const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator'; + +// Cache TTL for Gemini thoughtSignatures (2 hours) +export const GEMINI_SIGNATURE_CACHE_TTL_MS = 2 * 60 * 60 * 1000; + /** * Get the model family from model name (dynamic detection, no hardcoded list). * @param {string} modelName - The model name from the request @@ -152,6 +160,8 @@ export default { MAX_WAIT_BEFORE_ERROR_MS, MIN_SIGNATURE_LENGTH, GEMINI_MAX_OUTPUT_TOKENS, + GEMINI_SKIP_SIGNATURE, + GEMINI_SIGNATURE_CACHE_TTL_MS, getModelFamily, isThinkingModel, OAUTH_CONFIG, diff --git a/src/format/content-converter.js b/src/format/content-converter.js index d8a1f87..a0d7a61 100644 --- a/src/format/content-converter.js +++ b/src/format/content-converter.js @@ -3,15 +3,8 @@ * Converts Anthropic message content to Google Generative AI parts format */ -import { MIN_SIGNATURE_LENGTH } from '../constants.js'; - -/** - * Sentinel value to skip thought signature validation for Gemini models. - * Per Google documentation, this value can be used when Claude Code strips - * the thoughtSignature field from tool_use blocks in multi-turn requests. - * See: https://ai.google.dev/gemini-api/docs/thought-signatures - */ -const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator'; +import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js'; +import { getCachedSignature } from './signature-cache.js'; /** * Convert Anthropic role to Google role @@ -102,10 +95,17 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo // For Gemini models, include thoughtSignature at the part level // This is required by Gemini 3+ for tool calls to work correctly if (isGeminiModel) { - // Use thoughtSignature from the block if Claude Code preserved it - // Otherwise, use the sentinel value to skip validation (Claude Code strips non-standard fields) - // See: https://ai.google.dev/gemini-api/docs/thought-signatures - part.thoughtSignature = block.thoughtSignature || GEMINI_SKIP_SIGNATURE; + // Priority: block.thoughtSignature > cache > GEMINI_SKIP_SIGNATURE + let signature = block.thoughtSignature; + + if (!signature && block.id) { + signature = getCachedSignature(block.id); + if (signature) { + console.log('[ContentConverter] Restored signature from cache for:', block.id); + } + } + + part.thoughtSignature = signature || GEMINI_SKIP_SIGNATURE; } parts.push(part); diff --git a/src/format/request-converter.js b/src/format/request-converter.js index 1a0e7e8..7920ff2 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -14,7 +14,9 @@ import { restoreThinkingSignatures, removeTrailingThinkingBlocks, reorderAssistantContent, - filterUnsignedThinkingBlocks + filterUnsignedThinkingBlocks, + needsThinkingRecovery, + closeToolLoopForThinking } from './thinking-utils.js'; /** @@ -74,9 +76,18 @@ export function convertAnthropicToGoogle(anthropicRequest) { } } + // Apply thinking recovery for Gemini thinking models when needed + // This handles corrupted tool loops where thinking blocks are stripped + // Claude models handle this differently and don't need this recovery + let processedMessages = messages; + if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) { + console.log('[RequestConverter] Applying thinking recovery for Gemini'); + processedMessages = closeToolLoopForThinking(messages); + } + // Convert messages to contents, then filter unsigned thinking blocks - for (let i = 0; i < messages.length; i++) { - const msg = messages[i]; + for (let i = 0; i < processedMessages.length; i++) { + const msg = processedMessages[i]; let msgContent = msg.content; // For assistant messages, process thinking blocks and reorder content @@ -90,6 +101,14 @@ export function convertAnthropicToGoogle(anthropicRequest) { } const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel); + + // SAFETY: Google API requires at least one part per content message + // This happens when all thinking blocks are filtered out (unsigned) + if (parts.length === 0) { + console.log('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder'); + parts.push({ text: '' }); + } + const content = { role: convertRole(msg.role), parts: parts diff --git a/src/format/response-converter.js b/src/format/response-converter.js index 53ba447..59b919b 100644 --- a/src/format/response-converter.js +++ b/src/format/response-converter.js @@ -5,6 +5,7 @@ import crypto from 'crypto'; import { MIN_SIGNATURE_LENGTH } from '../constants.js'; +import { cacheSignature } from './signature-cache.js'; /** * Convert Google Generative AI response to Anthropic Messages API format @@ -58,6 +59,8 @@ export function convertGoogleToAnthropic(googleResponse, model) { // For Gemini 3+, include thoughtSignature from the part level if (part.thoughtSignature && part.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) { toolUseBlock.thoughtSignature = part.thoughtSignature; + // Cache for future requests (Claude Code may strip this field) + cacheSignature(toolId, part.thoughtSignature); } anthropicContent.push(toolUseBlock); diff --git a/src/format/schema-sanitizer.js b/src/format/schema-sanitizer.js index 733e58e..efe9a0d 100644 --- a/src/format/schema-sanitizer.js +++ b/src/format/schema-sanitizer.js @@ -289,6 +289,127 @@ function flattenAnyOfOneOf(schema) { return result; } +// ============================================================================ +// Enhanced Schema Hints (for preserving semantic information) +// ============================================================================ + +/** + * Add hints for enum values (if ≤10 values). + * This preserves enum information in the description since Gemini + * may not fully support enums in all cases. + * + * @param {Object} schema - Schema to process + * @returns {Object} Schema with enum hints added to description + */ +function addEnumHints(schema) { + if (!schema || typeof schema !== 'object') return schema; + if (Array.isArray(schema)) return schema.map(addEnumHints); + + let result = { ...schema }; + + // Add enum hint if present and reasonable size + if (Array.isArray(result.enum) && result.enum.length > 1 && result.enum.length <= 10) { + const vals = result.enum.map(v => String(v)).join(', '); + result = appendDescriptionHint(result, `Allowed: ${vals}`); + } + + // Recursively process properties + if (result.properties && typeof result.properties === 'object') { + const newProps = {}; + for (const [key, value] of Object.entries(result.properties)) { + newProps[key] = addEnumHints(value); + } + result.properties = newProps; + } + + // Recursively process items + if (result.items) { + result.items = Array.isArray(result.items) + ? result.items.map(addEnumHints) + : addEnumHints(result.items); + } + + return result; +} + +/** + * Add hints for additionalProperties: false. + * This informs the model that extra properties are not allowed. + * + * @param {Object} schema - Schema to process + * @returns {Object} Schema with additionalProperties hints added + */ +function addAdditionalPropertiesHints(schema) { + if (!schema || typeof schema !== 'object') return schema; + if (Array.isArray(schema)) return schema.map(addAdditionalPropertiesHints); + + let result = { ...schema }; + + if (result.additionalProperties === false) { + result = appendDescriptionHint(result, 'No extra properties allowed'); + } + + // Recursively process properties + if (result.properties && typeof result.properties === 'object') { + const newProps = {}; + for (const [key, value] of Object.entries(result.properties)) { + newProps[key] = addAdditionalPropertiesHints(value); + } + result.properties = newProps; + } + + // Recursively process items + if (result.items) { + result.items = Array.isArray(result.items) + ? result.items.map(addAdditionalPropertiesHints) + : addAdditionalPropertiesHints(result.items); + } + + return result; +} + +/** + * Move unsupported constraints to description hints. + * This preserves constraint information that would otherwise be lost + * when we strip unsupported keywords. + * + * @param {Object} schema - Schema to process + * @returns {Object} Schema with constraint hints added to description + */ +function moveConstraintsToDescription(schema) { + if (!schema || typeof schema !== 'object') return schema; + if (Array.isArray(schema)) return schema.map(moveConstraintsToDescription); + + const CONSTRAINTS = ['minLength', 'maxLength', 'pattern', 'minimum', 'maximum', + 'minItems', 'maxItems', 'format']; + + let result = { ...schema }; + + for (const constraint of CONSTRAINTS) { + if (result[constraint] !== undefined && typeof result[constraint] !== 'object') { + result = appendDescriptionHint(result, `${constraint}: ${result[constraint]}`); + } + } + + // Recursively process properties + if (result.properties && typeof result.properties === 'object') { + const newProps = {}; + for (const [key, value] of Object.entries(result.properties)) { + newProps[key] = moveConstraintsToDescription(value); + } + result.properties = newProps; + } + + // Recursively process items + if (result.items) { + result.items = Array.isArray(result.items) + ? result.items.map(moveConstraintsToDescription) + : moveConstraintsToDescription(result.items); + } + + return result; +} + /** * Flatten array type fields and track nullable properties. * Converts { type: ["string", "null"] } to { type: "string" } with nullable hint. @@ -457,6 +578,15 @@ export function cleanSchemaForGemini(schema) { // Phase 1: Convert $refs to hints let result = convertRefsToHints(schema); + // Phase 1b: Add enum hints (preserves enum info in description) + result = addEnumHints(result); + + // Phase 1c: Add additionalProperties hints + result = addAdditionalPropertiesHints(result); + + // Phase 1d: Move constraints to description (before they get stripped) + result = moveConstraintsToDescription(result); + // Phase 2a: Merge allOf schemas result = mergeAllOf(result); diff --git a/src/format/signature-cache.js b/src/format/signature-cache.js new file mode 100644 index 0000000..49154ee --- /dev/null +++ b/src/format/signature-cache.js @@ -0,0 +1,65 @@ +/** + * Signature Cache + * In-memory cache for Gemini thoughtSignatures + * + * Gemini models require thoughtSignature on tool calls, but Claude Code + * strips non-standard fields. This cache stores signatures by tool_use_id + * so they can be restored in subsequent requests. + */ + +import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js'; + +const signatureCache = new Map(); + +/** + * Store a signature for a tool_use_id + * @param {string} toolUseId - The tool use ID + * @param {string} signature - The thoughtSignature to cache + */ +export function cacheSignature(toolUseId, signature) { + if (!toolUseId || !signature) return; + signatureCache.set(toolUseId, { + signature, + timestamp: Date.now() + }); +} + +/** + * Get a cached signature for a tool_use_id + * @param {string} toolUseId - The tool use ID + * @returns {string|null} The cached signature or null if not found/expired + */ +export function getCachedSignature(toolUseId) { + if (!toolUseId) return null; + const entry = signatureCache.get(toolUseId); + if (!entry) return null; + + // Check TTL + if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) { + signatureCache.delete(toolUseId); + return null; + } + + return entry.signature; +} + +/** + * Clear expired entries from the cache + * Can be called periodically to prevent memory buildup + */ +export function cleanupCache() { + const now = Date.now(); + for (const [key, entry] of signatureCache) { + if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) { + signatureCache.delete(key); + } + } +} + +/** + * Get the current cache size (for debugging) + * @returns {number} Number of entries in the cache + */ +export function getCacheSize() { + return signatureCache.size; +} diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 9429c3e..44cef1e 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -257,3 +257,225 @@ export function reorderAssistantContent(content) { return reordered; } + +// ============================================================================ +// Thinking Recovery Functions +// ============================================================================ + +/** + * Check if a message has any VALID (signed) thinking blocks. + * Only counts thinking blocks that have valid signatures, not unsigned ones + * that will be dropped later. + * + * @param {Object} message - Message to check + * @returns {boolean} True if message has valid signed thinking blocks + */ +function messageHasValidThinking(message) { + const content = message.content || message.parts || []; + if (!Array.isArray(content)) return false; + return content.some(block => { + if (!isThinkingPart(block)) return false; + // Check for valid signature (Anthropic style) + if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) return true; + // Check for thoughtSignature (Gemini style on functionCall) + if (block.thoughtSignature && block.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) return true; + return false; + }); +} + +/** + * Check if a message has tool_use blocks + * @param {Object} message - Message to check + * @returns {boolean} True if message has tool_use blocks + */ +function messageHasToolUse(message) { + const content = message.content || message.parts || []; + if (!Array.isArray(content)) return false; + return content.some(block => + block.type === 'tool_use' || block.functionCall + ); +} + +/** + * Check if a message has tool_result blocks + * @param {Object} message - Message to check + * @returns {boolean} True if message has tool_result blocks + */ +function messageHasToolResult(message) { + const content = message.content || message.parts || []; + if (!Array.isArray(content)) return false; + return content.some(block => + block.type === 'tool_result' || block.functionResponse + ); +} + +/** + * Check if message is a plain user text message (not tool_result) + * @param {Object} message - Message to check + * @returns {boolean} True if message is plain user text + */ +function isPlainUserMessage(message) { + if (message.role !== 'user') return false; + const content = message.content || message.parts || []; + if (!Array.isArray(content)) return typeof content === 'string'; + // Check if it has tool_result blocks + return !content.some(block => + block.type === 'tool_result' || block.functionResponse + ); +} + +/** + * Analyze conversation state to detect if we're in a corrupted state. + * This includes: + * 1. Tool loop: assistant has tool_use followed by tool_results (normal flow) + * 2. Interrupted tool: assistant has tool_use followed by plain user message (interrupted) + * + * @param {Array} messages - Array of messages + * @returns {Object} State object with inToolLoop, interruptedTool, turnHasThinking, etc. + */ +export function analyzeConversationState(messages) { + if (!Array.isArray(messages) || messages.length === 0) { + return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 }; + } + + // Find the last assistant message + let lastAssistantIdx = -1; + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i].role === 'assistant' || messages[i].role === 'model') { + lastAssistantIdx = i; + break; + } + } + + if (lastAssistantIdx === -1) { + return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 }; + } + + const lastAssistant = messages[lastAssistantIdx]; + const hasToolUse = messageHasToolUse(lastAssistant); + const hasThinking = messageHasValidThinking(lastAssistant); + + // Count trailing tool results after the assistant message + let toolResultCount = 0; + let hasPlainUserMessageAfter = false; + for (let i = lastAssistantIdx + 1; i < messages.length; i++) { + if (messageHasToolResult(messages[i])) { + toolResultCount++; + } + if (isPlainUserMessage(messages[i])) { + hasPlainUserMessageAfter = true; + } + } + + // We're in a tool loop if: assistant has tool_use AND there are tool_results after + const inToolLoop = hasToolUse && toolResultCount > 0; + + // We have an interrupted tool if: assistant has tool_use, NO tool_results, + // but there IS a plain user message after (user interrupted and sent new message) + const interruptedTool = hasToolUse && toolResultCount === 0 && hasPlainUserMessageAfter; + + return { + inToolLoop, + interruptedTool, + turnHasThinking: hasThinking, + toolResultCount, + lastAssistantIdx + }; +} + +/** + * Check if conversation needs thinking recovery. + * Returns true when: + * 1. We're in a tool loop but have no valid thinking blocks, OR + * 2. We have an interrupted tool with no valid thinking blocks + * + * @param {Array} messages - Array of messages + * @returns {boolean} True if thinking recovery is needed + */ +export function needsThinkingRecovery(messages) { + const state = analyzeConversationState(messages); + // Need recovery if (tool loop OR interrupted tool) AND no thinking + return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking; +} + +/** + * Strip all thinking blocks from messages. + * Used before injecting synthetic messages for recovery. + * + * @param {Array} messages - Array of messages + * @returns {Array} Messages with all thinking blocks removed + */ +function stripAllThinkingBlocks(messages) { + return messages.map(msg => { + const content = msg.content || msg.parts; + if (!Array.isArray(content)) return msg; + + const filtered = content.filter(block => !isThinkingPart(block)); + + if (msg.content) { + return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] }; + } else if (msg.parts) { + return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] }; + } + return msg; + }); +} + +/** + * Close tool loop by injecting synthetic messages. + * This allows the model to start a fresh turn when thinking is corrupted. + * + * When thinking blocks are stripped (no valid signatures) and we're in the + * middle of a tool loop OR have an interrupted tool, the conversation is in + * a corrupted state. This function injects synthetic messages to close the + * loop and allow the model to continue. + * + * @param {Array} messages - Array of messages + * @returns {Array} Modified messages with synthetic messages injected + */ +export function closeToolLoopForThinking(messages) { + const state = analyzeConversationState(messages); + + // Handle neither tool loop nor interrupted tool + if (!state.inToolLoop && !state.interruptedTool) return messages; + + // Strip all thinking blocks + let modified = stripAllThinkingBlocks(messages); + + if (state.interruptedTool) { + // For interrupted tools: just strip thinking and add a synthetic assistant message + // to acknowledge the interruption before the user's new message + + // Find where to insert the synthetic message (before the plain user message) + const insertIdx = state.lastAssistantIdx + 1; + + // Insert synthetic assistant message acknowledging interruption + modified.splice(insertIdx, 0, { + role: 'assistant', + content: [{ type: 'text', text: '[Tool call was interrupted.]' }] + }); + + console.log('[ThinkingUtils] Applied thinking recovery for interrupted tool'); + } else { + // For tool loops: add synthetic messages to close the loop + const syntheticText = state.toolResultCount === 1 + ? '[Tool execution completed.]' + : `[${state.toolResultCount} tool executions completed.]`; + + // Inject synthetic model message to complete the turn + modified.push({ + role: 'assistant', + content: [{ type: 'text', text: syntheticText }] + }); + + // Inject synthetic user message to start fresh + modified.push({ + role: 'user', + content: [{ type: 'text', text: '[Continue]' }] + }); + + console.log('[ThinkingUtils] Applied thinking recovery for tool loop'); + } + + return modified; +}