Implement Gemini signature caching and thinking recovery

- Add in-memory signature cache to restore thoughtSignatures stripped by Claude Code - Implement thinking recovery logic to handle interrupted tool loops for Gemini - Enhance schema sanitizer to preserve constraints and enums as description hints - Update CLAUDE.md with new architecture details 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-28 14:34:03 +05:30
parent 1eb2329f7c
commit 426acc494a
9 changed files with 473 additions and 20 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -54,8 +54,9 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav
  - `request-converter.js` - Anthropic → Google request conversion
  - `response-converter.js` - Google → Anthropic response conversion
  - `content-converter.js` - Message content and role conversion
-  - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility
+  - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility (preserves constraints/enums as hints)
-  - `thinking-utils.js` - Thinking block validation, filtering, and reordering
+  - `thinking-utils.js` - Thinking block validation, filtering, reordering, and recovery logic
  - `signature-cache.js` - In-memory cache for Gemini thoughtSignatures
 - **src/account-manager.js**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown
 - **src/oauth.js**: Google OAuth implementation for adding accounts
 - **src/token-extractor.js**: Extracts tokens from local Antigravity app installation (legacy single-account mode)
@@ -94,8 +95,8 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav
 **Model Family Handling:**
 - `getModelFamily(model)` returns `'claude'` or `'gemini'` based on model name
 - Claude models use `signature` field on thinking blocks
- Gemini models use `thoughtSignature` field on functionCall parts
+- Gemini models use `thoughtSignature` field on functionCall parts (cached or sentinel value)
- When Claude Code strips `thoughtSignature`, the proxy uses Google's `skip_thought_signature_validator` sentinel value
+- When Claude Code strips `thoughtSignature`, the proxy tries to restore from cache, then falls back to `skip_thought_signature_validator`
 **Error Handling:** Use custom error classes from `src/errors.js`:
 - `RateLimitError` - 429/RESOURCE_EXHAUSTED errors
--- a/src/cloudcode-client.js
+++ b/src/cloudcode-client.js
@@ -23,6 +23,7 @@ import {
    convertAnthropicToGoogle,
    convertGoogleToAnthropic
 } from './format/index.js';
 import { cacheSignature } from './format/signature-cache.js';
 import { formatDuration, sleep } from './utils/helpers.js';
 import { isRateLimitError, isAuthError } from './errors.js';
@@ -848,6 +849,8 @@ async function* streamSSEResponse(response, originalModel) {
                        // Store the signature in the tool_use block for later retrieval
                        if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) {
                            toolUseBlock.thoughtSignature = functionCallSignature;
                            // Cache for future requests (Claude Code may strip this field)
                            cacheSignature(toolId, functionCallSignature);
                        }
                        yield {
--- a/src/constants.js
+++ b/src/constants.js
@@ -87,6 +87,14 @@ export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature leng
 // Gemini-specific limits
 export const GEMINI_MAX_OUTPUT_TOKENS = 16384;
 // Gemini signature handling
 // Sentinel value to skip thought signature validation when Claude Code strips the field
 // See: https://ai.google.dev/gemini-api/docs/thought-signatures
 export const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator';
 // Cache TTL for Gemini thoughtSignatures (2 hours)
 export const GEMINI_SIGNATURE_CACHE_TTL_MS = 2 * 60 * 60 * 1000;
 /**
 * Get the model family from model name (dynamic detection, no hardcoded list).
 * @param {string} modelName - The model name from the request
@@ -152,6 +160,8 @@ export default {
    MAX_WAIT_BEFORE_ERROR_MS,
    MIN_SIGNATURE_LENGTH,
    GEMINI_MAX_OUTPUT_TOKENS,
    GEMINI_SKIP_SIGNATURE,
    GEMINI_SIGNATURE_CACHE_TTL_MS,
    getModelFamily,
    isThinkingModel,
    OAUTH_CONFIG,
--- a/src/format/content-converter.js
+++ b/src/format/content-converter.js
@@ -3,15 +3,8 @@
 * Converts Anthropic message content to Google Generative AI parts format
 */
-import { MIN_SIGNATURE_LENGTH } from '../constants.js';
+import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js';
-
+import { getCachedSignature } from './signature-cache.js';
 /**
 * Sentinel value to skip thought signature validation for Gemini models.
 * Per Google documentation, this value can be used when Claude Code strips
 * the thoughtSignature field from tool_use blocks in multi-turn requests.
 * See: https://ai.google.dev/gemini-api/docs/thought-signatures
 */
 const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator';
 /**
 * Convert Anthropic role to Google role
@@ -102,10 +95,17 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
            // For Gemini models, include thoughtSignature at the part level
            // This is required by Gemini 3+ for tool calls to work correctly
            if (isGeminiModel) {
-                // Use thoughtSignature from the block if Claude Code preserved it
+                // Priority: block.thoughtSignature > cache > GEMINI_SKIP_SIGNATURE
-                // Otherwise, use the sentinel value to skip validation (Claude Code strips non-standard fields)
+                let signature = block.thoughtSignature;
-                // See: https://ai.google.dev/gemini-api/docs/thought-signatures
+
-                part.thoughtSignature = block.thoughtSignature || GEMINI_SKIP_SIGNATURE;
+                if (!signature && block.id) {
                    signature = getCachedSignature(block.id);
                    if (signature) {
                        console.log('[ContentConverter] Restored signature from cache for:', block.id);
                    }
                }
                part.thoughtSignature = signature || GEMINI_SKIP_SIGNATURE;
            }
            parts.push(part);
--- a/src/format/request-converter.js
+++ b/src/format/request-converter.js
@@ -14,7 +14,9 @@ import {
    restoreThinkingSignatures,
    removeTrailingThinkingBlocks,
    reorderAssistantContent,
-    filterUnsignedThinkingBlocks
+    filterUnsignedThinkingBlocks,
    needsThinkingRecovery,
    closeToolLoopForThinking
 } from './thinking-utils.js';
 /**
@@ -74,9 +76,18 @@ export function convertAnthropicToGoogle(anthropicRequest) {
        }
    }
    // Apply thinking recovery for Gemini thinking models when needed
    // This handles corrupted tool loops where thinking blocks are stripped
    // Claude models handle this differently and don't need this recovery
    let processedMessages = messages;
    if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
        console.log('[RequestConverter] Applying thinking recovery for Gemini');
        processedMessages = closeToolLoopForThinking(messages);
    }
    // Convert messages to contents, then filter unsigned thinking blocks
-    for (let i = 0; i < messages.length; i++) {
+    for (let i = 0; i < processedMessages.length; i++) {
-        const msg = messages[i];
+        const msg = processedMessages[i];
        let msgContent = msg.content;
        // For assistant messages, process thinking blocks and reorder content
@@ -90,6 +101,14 @@ export function convertAnthropicToGoogle(anthropicRequest) {
        }
        const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel);
        // SAFETY: Google API requires at least one part per content message
        // This happens when all thinking blocks are filtered out (unsigned)
        if (parts.length === 0) {
            console.log('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
            parts.push({ text: '' });
        }
        const content = {
            role: convertRole(msg.role),
            parts: parts
--- a/src/format/response-converter.js
+++ b/src/format/response-converter.js
@@ -5,6 +5,7 @@
 import crypto from 'crypto';
 import { MIN_SIGNATURE_LENGTH } from '../constants.js';
 import { cacheSignature } from './signature-cache.js';
 /**
 * Convert Google Generative AI response to Anthropic Messages API format
@@ -58,6 +59,8 @@ export function convertGoogleToAnthropic(googleResponse, model) {
            // For Gemini 3+, include thoughtSignature from the part level
            if (part.thoughtSignature && part.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) {
                toolUseBlock.thoughtSignature = part.thoughtSignature;
                // Cache for future requests (Claude Code may strip this field)
                cacheSignature(toolId, part.thoughtSignature);
            }
            anthropicContent.push(toolUseBlock);
--- a/src/format/schema-sanitizer.js
+++ b/src/format/schema-sanitizer.js
@@ -289,6 +289,127 @@ function flattenAnyOfOneOf(schema) {
    return result;
 }
 // ============================================================================
 // Enhanced Schema Hints (for preserving semantic information)
 // ============================================================================
 /**
 * Add hints for enum values (if ≤10 values).
 * This preserves enum information in the description since Gemini
 * may not fully support enums in all cases.
 *
 * @param {Object} schema - Schema to process
 * @returns {Object} Schema with enum hints added to description
 */
 function addEnumHints(schema) {
    if (!schema || typeof schema !== 'object') return schema;
    if (Array.isArray(schema)) return schema.map(addEnumHints);
    let result = { ...schema };
    // Add enum hint if present and reasonable size
    if (Array.isArray(result.enum) && result.enum.length > 1 && result.enum.length <= 10) {
        const vals = result.enum.map(v => String(v)).join(', ');
        result = appendDescriptionHint(result, `Allowed: ${vals}`);
    }
    // Recursively process properties
    if (result.properties && typeof result.properties === 'object') {
        const newProps = {};
        for (const [key, value] of Object.entries(result.properties)) {
            newProps[key] = addEnumHints(value);
        }
        result.properties = newProps;
    }
    // Recursively process items
    if (result.items) {
        result.items = Array.isArray(result.items)
            ? result.items.map(addEnumHints)
            : addEnumHints(result.items);
    }
    return result;
 }
 /**
 * Add hints for additionalProperties: false.
 * This informs the model that extra properties are not allowed.
 *
 * @param {Object} schema - Schema to process
 * @returns {Object} Schema with additionalProperties hints added
 */
 function addAdditionalPropertiesHints(schema) {
    if (!schema || typeof schema !== 'object') return schema;
    if (Array.isArray(schema)) return schema.map(addAdditionalPropertiesHints);
    let result = { ...schema };
    if (result.additionalProperties === false) {
        result = appendDescriptionHint(result, 'No extra properties allowed');
    }
    // Recursively process properties
    if (result.properties && typeof result.properties === 'object') {
        const newProps = {};
        for (const [key, value] of Object.entries(result.properties)) {
            newProps[key] = addAdditionalPropertiesHints(value);
        }
        result.properties = newProps;
    }
    // Recursively process items
    if (result.items) {
        result.items = Array.isArray(result.items)
            ? result.items.map(addAdditionalPropertiesHints)
            : addAdditionalPropertiesHints(result.items);
    }
    return result;
 }
 /**
 * Move unsupported constraints to description hints.
 * This preserves constraint information that would otherwise be lost
 * when we strip unsupported keywords.
 *
 * @param {Object} schema - Schema to process
 * @returns {Object} Schema with constraint hints added to description
 */
 function moveConstraintsToDescription(schema) {
    if (!schema || typeof schema !== 'object') return schema;
    if (Array.isArray(schema)) return schema.map(moveConstraintsToDescription);
    const CONSTRAINTS = ['minLength', 'maxLength', 'pattern', 'minimum', 'maximum',
                         'minItems', 'maxItems', 'format'];
    let result = { ...schema };
    for (const constraint of CONSTRAINTS) {
        if (result[constraint] !== undefined && typeof result[constraint] !== 'object') {
            result = appendDescriptionHint(result, `${constraint}: ${result[constraint]}`);
        }
    }
    // Recursively process properties
    if (result.properties && typeof result.properties === 'object') {
        const newProps = {};
        for (const [key, value] of Object.entries(result.properties)) {
            newProps[key] = moveConstraintsToDescription(value);
        }
        result.properties = newProps;
    }
    // Recursively process items
    if (result.items) {
        result.items = Array.isArray(result.items)
            ? result.items.map(moveConstraintsToDescription)
            : moveConstraintsToDescription(result.items);
    }
    return result;
 }
 /**
 * Flatten array type fields and track nullable properties.
 * Converts { type: ["string", "null"] } to { type: "string" } with nullable hint.
@@ -457,6 +578,15 @@ export function cleanSchemaForGemini(schema) {
    // Phase 1: Convert $refs to hints
    let result = convertRefsToHints(schema);
    // Phase 1b: Add enum hints (preserves enum info in description)
    result = addEnumHints(result);
    // Phase 1c: Add additionalProperties hints
    result = addAdditionalPropertiesHints(result);
    // Phase 1d: Move constraints to description (before they get stripped)
    result = moveConstraintsToDescription(result);
    // Phase 2a: Merge allOf schemas
    result = mergeAllOf(result);
--- a/src/format/signature-cache.js
+++ b/src/format/signature-cache.js
@@ -0,0 +1,65 @@
 /**
 * Signature Cache
 * In-memory cache for Gemini thoughtSignatures
 *
 * Gemini models require thoughtSignature on tool calls, but Claude Code
 * strips non-standard fields. This cache stores signatures by tool_use_id
 * so they can be restored in subsequent requests.
 */
 import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js';
 const signatureCache = new Map();
 /**
 * Store a signature for a tool_use_id
 * @param {string} toolUseId - The tool use ID
 * @param {string} signature - The thoughtSignature to cache
 */
 export function cacheSignature(toolUseId, signature) {
    if (!toolUseId || !signature) return;
    signatureCache.set(toolUseId, {
        signature,
        timestamp: Date.now()
    });
 }
 /**
 * Get a cached signature for a tool_use_id
 * @param {string} toolUseId - The tool use ID
 * @returns {string|null} The cached signature or null if not found/expired
 */
 export function getCachedSignature(toolUseId) {
    if (!toolUseId) return null;
    const entry = signatureCache.get(toolUseId);
    if (!entry) return null;
    // Check TTL
    if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
        signatureCache.delete(toolUseId);
        return null;
    }
    return entry.signature;
 }
 /**
 * Clear expired entries from the cache
 * Can be called periodically to prevent memory buildup
 */
 export function cleanupCache() {
    const now = Date.now();
    for (const [key, entry] of signatureCache) {
        if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
            signatureCache.delete(key);
        }
    }
 }
 /**
 * Get the current cache size (for debugging)
 * @returns {number} Number of entries in the cache
 */
 export function getCacheSize() {
    return signatureCache.size;
 }
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -257,3 +257,225 @@ export function reorderAssistantContent(content) {
    return reordered;
 }
 // ============================================================================
 // Thinking Recovery Functions
 // ============================================================================
 /**
 * Check if a message has any VALID (signed) thinking blocks.
 * Only counts thinking blocks that have valid signatures, not unsigned ones
 * that will be dropped later.
 *
 * @param {Object} message - Message to check
 * @returns {boolean} True if message has valid signed thinking blocks
 */
 function messageHasValidThinking(message) {
    const content = message.content || message.parts || [];
    if (!Array.isArray(content)) return false;
    return content.some(block => {
        if (!isThinkingPart(block)) return false;
        // Check for valid signature (Anthropic style)
        if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) return true;
        // Check for thoughtSignature (Gemini style on functionCall)
        if (block.thoughtSignature && block.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) return true;
        return false;
    });
 }
 /**
 * Check if a message has tool_use blocks
 * @param {Object} message - Message to check
 * @returns {boolean} True if message has tool_use blocks
 */
 function messageHasToolUse(message) {
    const content = message.content || message.parts || [];
    if (!Array.isArray(content)) return false;
    return content.some(block =>
        block.type === 'tool_use' || block.functionCall
    );
 }
 /**
 * Check if a message has tool_result blocks
 * @param {Object} message - Message to check
 * @returns {boolean} True if message has tool_result blocks
 */
 function messageHasToolResult(message) {
    const content = message.content || message.parts || [];
    if (!Array.isArray(content)) return false;
    return content.some(block =>
        block.type === 'tool_result' || block.functionResponse
    );
 }
 /**
 * Check if message is a plain user text message (not tool_result)
 * @param {Object} message - Message to check
 * @returns {boolean} True if message is plain user text
 */
 function isPlainUserMessage(message) {
    if (message.role !== 'user') return false;
    const content = message.content || message.parts || [];
    if (!Array.isArray(content)) return typeof content === 'string';
    // Check if it has tool_result blocks
    return !content.some(block =>
        block.type === 'tool_result' || block.functionResponse
    );
 }
 /**
 * Analyze conversation state to detect if we're in a corrupted state.
 * This includes:
 * 1. Tool loop: assistant has tool_use followed by tool_results (normal flow)
 * 2. Interrupted tool: assistant has tool_use followed by plain user message (interrupted)
 *
 * @param {Array<Object>} messages - Array of messages
 * @returns {Object} State object with inToolLoop, interruptedTool, turnHasThinking, etc.
 */
 export function analyzeConversationState(messages) {
    if (!Array.isArray(messages) || messages.length === 0) {
        return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
    }
    // Find the last assistant message
    let lastAssistantIdx = -1;
    for (let i = messages.length - 1; i >= 0; i--) {
        if (messages[i].role === 'assistant' || messages[i].role === 'model') {
            lastAssistantIdx = i;
            break;
        }
    }
    if (lastAssistantIdx === -1) {
        return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
    }
    const lastAssistant = messages[lastAssistantIdx];
    const hasToolUse = messageHasToolUse(lastAssistant);
    const hasThinking = messageHasValidThinking(lastAssistant);
    // Count trailing tool results after the assistant message
    let toolResultCount = 0;
    let hasPlainUserMessageAfter = false;
    for (let i = lastAssistantIdx + 1; i < messages.length; i++) {
        if (messageHasToolResult(messages[i])) {
            toolResultCount++;
        }
        if (isPlainUserMessage(messages[i])) {
            hasPlainUserMessageAfter = true;
        }
    }
    // We're in a tool loop if: assistant has tool_use AND there are tool_results after
    const inToolLoop = hasToolUse && toolResultCount > 0;
    // We have an interrupted tool if: assistant has tool_use, NO tool_results,
    // but there IS a plain user message after (user interrupted and sent new message)
    const interruptedTool = hasToolUse && toolResultCount === 0 && hasPlainUserMessageAfter;
    return {
        inToolLoop,
        interruptedTool,
        turnHasThinking: hasThinking,
        toolResultCount,
        lastAssistantIdx
    };
 }
 /**
 * Check if conversation needs thinking recovery.
 * Returns true when:
 * 1. We're in a tool loop but have no valid thinking blocks, OR
 * 2. We have an interrupted tool with no valid thinking blocks
 *
 * @param {Array<Object>} messages - Array of messages
 * @returns {boolean} True if thinking recovery is needed
 */
 export function needsThinkingRecovery(messages) {
    const state = analyzeConversationState(messages);
    // Need recovery if (tool loop OR interrupted tool) AND no thinking
    return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
 }
 /**
 * Strip all thinking blocks from messages.
 * Used before injecting synthetic messages for recovery.
 *
 * @param {Array<Object>} messages - Array of messages
 * @returns {Array<Object>} Messages with all thinking blocks removed
 */
 function stripAllThinkingBlocks(messages) {
    return messages.map(msg => {
        const content = msg.content || msg.parts;
        if (!Array.isArray(content)) return msg;
        const filtered = content.filter(block => !isThinkingPart(block));
        if (msg.content) {
            return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] };
        } else if (msg.parts) {
            return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] };
        }
        return msg;
    });
 }
 /**
 * Close tool loop by injecting synthetic messages.
 * This allows the model to start a fresh turn when thinking is corrupted.
 *
 * When thinking blocks are stripped (no valid signatures) and we're in the
 * middle of a tool loop OR have an interrupted tool, the conversation is in
 * a corrupted state. This function injects synthetic messages to close the
 * loop and allow the model to continue.
 *
 * @param {Array<Object>} messages - Array of messages
 * @returns {Array<Object>} Modified messages with synthetic messages injected
 */
 export function closeToolLoopForThinking(messages) {
    const state = analyzeConversationState(messages);
    // Handle neither tool loop nor interrupted tool
    if (!state.inToolLoop && !state.interruptedTool) return messages;
    // Strip all thinking blocks
    let modified = stripAllThinkingBlocks(messages);
    if (state.interruptedTool) {
        // For interrupted tools: just strip thinking and add a synthetic assistant message
        // to acknowledge the interruption before the user's new message
        // Find where to insert the synthetic message (before the plain user message)
        const insertIdx = state.lastAssistantIdx + 1;
        // Insert synthetic assistant message acknowledging interruption
        modified.splice(insertIdx, 0, {
            role: 'assistant',
            content: [{ type: 'text', text: '[Tool call was interrupted.]' }]
        });
        console.log('[ThinkingUtils] Applied thinking recovery for interrupted tool');
    } else {
        // For tool loops: add synthetic messages to close the loop
        const syntheticText = state.toolResultCount === 1
            ? '[Tool execution completed.]'
            : `[${state.toolResultCount} tool executions completed.]`;
        // Inject synthetic model message to complete the turn
        modified.push({
            role: 'assistant',
            content: [{ type: 'text', text: syntheticText }]
        });
        // Inject synthetic user message to start fresh
        modified.push({
            role: 'user',
            content: [{ type: 'text', text: '[Continue]' }]
        });
        console.log('[ThinkingUtils] Applied thinking recovery for tool loop');
    }
    return modified;
 }