Implement Gemini signature caching and thinking recovery

- Add in-memory signature cache to restore thoughtSignatures stripped by Claude Code - Implement thinking recovery logic to handle interrupted tool loops for Gemini - Enhance schema sanitizer to preserve constraints and enums as description hints - Update CLAUDE.md with new architecture details 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-28 14:34:03 +05:30
parent 1eb2329f7c
commit 426acc494a
9 changed files with 473 additions and 20 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -54,8 +54,9 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav
  - `request-converter.js` - Anthropic → Google request conversion
  - `response-converter.js` - Google → Anthropic response conversion
  - `content-converter.js` - Message content and role conversion
-  - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility
-  - `thinking-utils.js` - Thinking block validation, filtering, and reordering
+  - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility (preserves constraints/enums as hints)
+  - `thinking-utils.js` - Thinking block validation, filtering, reordering, and recovery logic
+  - `signature-cache.js` - In-memory cache for Gemini thoughtSignatures
 - **src/account-manager.js**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown
 - **src/oauth.js**: Google OAuth implementation for adding accounts
 - **src/token-extractor.js**: Extracts tokens from local Antigravity app installation (legacy single-account mode)
@@ -94,8 +95,8 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav
 **Model Family Handling:**
 - `getModelFamily(model)` returns `'claude'` or `'gemini'` based on model name
 - Claude models use `signature` field on thinking blocks
- Gemini models use `thoughtSignature` field on functionCall parts
- When Claude Code strips `thoughtSignature`, the proxy uses Google's `skip_thought_signature_validator` sentinel value
+- Gemini models use `thoughtSignature` field on functionCall parts (cached or sentinel value)
+- When Claude Code strips `thoughtSignature`, the proxy tries to restore from cache, then falls back to `skip_thought_signature_validator`

 **Error Handling:** Use custom error classes from `src/errors.js`:
 - `RateLimitError` - 429/RESOURCE_EXHAUSTED errors
--- a/src/cloudcode-client.js
+++ b/src/cloudcode-client.js
@@ -23,6 +23,7 @@ import {
    convertAnthropicToGoogle,
    convertGoogleToAnthropic
 } from './format/index.js';
+import { cacheSignature } from './format/signature-cache.js';
 import { formatDuration, sleep } from './utils/helpers.js';
 import { isRateLimitError, isAuthError } from './errors.js';

@@ -848,6 +849,8 @@ async function* streamSSEResponse(response, originalModel) {
                        // Store the signature in the tool_use block for later retrieval
                        if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) {
                            toolUseBlock.thoughtSignature = functionCallSignature;
+                            // Cache for future requests (Claude Code may strip this field)
+                            cacheSignature(toolId, functionCallSignature);
                        }

                        yield {
--- a/src/constants.js
+++ b/src/constants.js
@@ -87,6 +87,14 @@ export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature leng
 // Gemini-specific limits
 export const GEMINI_MAX_OUTPUT_TOKENS = 16384;

+// Gemini signature handling
+// Sentinel value to skip thought signature validation when Claude Code strips the field
+// See: https://ai.google.dev/gemini-api/docs/thought-signatures
+export const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator';
+
+// Cache TTL for Gemini thoughtSignatures (2 hours)
+export const GEMINI_SIGNATURE_CACHE_TTL_MS = 2 * 60 * 60 * 1000;
+
 /**
 * Get the model family from model name (dynamic detection, no hardcoded list).
 * @param {string} modelName - The model name from the request
@@ -152,6 +160,8 @@ export default {
    MAX_WAIT_BEFORE_ERROR_MS,
    MIN_SIGNATURE_LENGTH,
    GEMINI_MAX_OUTPUT_TOKENS,
+    GEMINI_SKIP_SIGNATURE,
+    GEMINI_SIGNATURE_CACHE_TTL_MS,
    getModelFamily,
    isThinkingModel,
    OAUTH_CONFIG,
--- a/src/format/content-converter.js
+++ b/src/format/content-converter.js
@@ -3,15 +3,8 @@
 * Converts Anthropic message content to Google Generative AI parts format
 */

-import { MIN_SIGNATURE_LENGTH } from '../constants.js';
-
-/**
- * Sentinel value to skip thought signature validation for Gemini models.
- * Per Google documentation, this value can be used when Claude Code strips
- * the thoughtSignature field from tool_use blocks in multi-turn requests.
- * See: https://ai.google.dev/gemini-api/docs/thought-signatures
- */
-const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator';
+import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js';
+import { getCachedSignature } from './signature-cache.js';

 /**
 * Convert Anthropic role to Google role
@@ -102,10 +95,17 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
            // For Gemini models, include thoughtSignature at the part level
            // This is required by Gemini 3+ for tool calls to work correctly
            if (isGeminiModel) {
-                // Use thoughtSignature from the block if Claude Code preserved it
-                // Otherwise, use the sentinel value to skip validation (Claude Code strips non-standard fields)
-                // See: https://ai.google.dev/gemini-api/docs/thought-signatures
-                part.thoughtSignature = block.thoughtSignature || GEMINI_SKIP_SIGNATURE;
+                // Priority: block.thoughtSignature > cache > GEMINI_SKIP_SIGNATURE
+                let signature = block.thoughtSignature;
+
+                if (!signature && block.id) {
+                    signature = getCachedSignature(block.id);
+                    if (signature) {
+                        console.log('[ContentConverter] Restored signature from cache for:', block.id);
+                    }
+                }
+
+                part.thoughtSignature = signature || GEMINI_SKIP_SIGNATURE;
            }

            parts.push(part);
--- a/src/format/request-converter.js
+++ b/src/format/request-converter.js
@@ -14,7 +14,9 @@ import {
    restoreThinkingSignatures,
    removeTrailingThinkingBlocks,
    reorderAssistantContent,
-    filterUnsignedThinkingBlocks
+    filterUnsignedThinkingBlocks,
+    needsThinkingRecovery,
+    closeToolLoopForThinking
 } from './thinking-utils.js';

 /**
@@ -74,9 +76,18 @@ export function convertAnthropicToGoogle(anthropicRequest) {
        }
    }

+    // Apply thinking recovery for Gemini thinking models when needed
+    // This handles corrupted tool loops where thinking blocks are stripped
+    // Claude models handle this differently and don't need this recovery
+    let processedMessages = messages;
+    if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
+        console.log('[RequestConverter] Applying thinking recovery for Gemini');
+        processedMessages = closeToolLoopForThinking(messages);
+    }
+
    // Convert messages to contents, then filter unsigned thinking blocks
-    for (let i = 0; i < messages.length; i++) {
-        const msg = messages[i];
+    for (let i = 0; i < processedMessages.length; i++) {
+        const msg = processedMessages[i];
        let msgContent = msg.content;

        // For assistant messages, process thinking blocks and reorder content
@@ -90,6 +101,14 @@ export function convertAnthropicToGoogle(anthropicRequest) {
        }

        const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel);
+
+        // SAFETY: Google API requires at least one part per content message
+        // This happens when all thinking blocks are filtered out (unsigned)
+        if (parts.length === 0) {
+            console.log('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
+            parts.push({ text: '' });
+        }
+
        const content = {
            role: convertRole(msg.role),
            parts: parts
--- a/src/format/response-converter.js
+++ b/src/format/response-converter.js
@@ -5,6 +5,7 @@

 import crypto from 'crypto';
 import { MIN_SIGNATURE_LENGTH } from '../constants.js';
+import { cacheSignature } from './signature-cache.js';

 /**
 * Convert Google Generative AI response to Anthropic Messages API format
@@ -58,6 +59,8 @@ export function convertGoogleToAnthropic(googleResponse, model) {
            // For Gemini 3+, include thoughtSignature from the part level
            if (part.thoughtSignature && part.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) {
                toolUseBlock.thoughtSignature = part.thoughtSignature;
+                // Cache for future requests (Claude Code may strip this field)
+                cacheSignature(toolId, part.thoughtSignature);
            }

            anthropicContent.push(toolUseBlock);
--- a/src/format/schema-sanitizer.js
+++ b/src/format/schema-sanitizer.js
@@ -289,6 +289,127 @@ function flattenAnyOfOneOf(schema) {
    return result;
 }

+// ============================================================================
+// Enhanced Schema Hints (for preserving semantic information)
+// ============================================================================
+
+/**
+ * Add hints for enum values (if ≤10 values).
+ * This preserves enum information in the description since Gemini
+ * may not fully support enums in all cases.
+ *
+ * @param {Object} schema - Schema to process
+ * @returns {Object} Schema with enum hints added to description
+ */
+function addEnumHints(schema) {
+    if (!schema || typeof schema !== 'object') return schema;
+    if (Array.isArray(schema)) return schema.map(addEnumHints);
+
+    let result = { ...schema };
+
+    // Add enum hint if present and reasonable size
+    if (Array.isArray(result.enum) && result.enum.length > 1 && result.enum.length <= 10) {
+        const vals = result.enum.map(v => String(v)).join(', ');
+        result = appendDescriptionHint(result, `Allowed: ${vals}`);
+    }
+
+    // Recursively process properties
+    if (result.properties && typeof result.properties === 'object') {
+        const newProps = {};
+        for (const [key, value] of Object.entries(result.properties)) {
+            newProps[key] = addEnumHints(value);
+        }
+        result.properties = newProps;
+    }
+
+    // Recursively process items
+    if (result.items) {
+        result.items = Array.isArray(result.items)
+            ? result.items.map(addEnumHints)
+            : addEnumHints(result.items);
+    }
+
+    return result;
+}
+
+/**
+ * Add hints for additionalProperties: false.
+ * This informs the model that extra properties are not allowed.
+ *
+ * @param {Object} schema - Schema to process
+ * @returns {Object} Schema with additionalProperties hints added
+ */
+function addAdditionalPropertiesHints(schema) {
+    if (!schema || typeof schema !== 'object') return schema;
+    if (Array.isArray(schema)) return schema.map(addAdditionalPropertiesHints);
+
+    let result = { ...schema };
+
+    if (result.additionalProperties === false) {
+        result = appendDescriptionHint(result, 'No extra properties allowed');
+    }
+
+    // Recursively process properties
+    if (result.properties && typeof result.properties === 'object') {
+        const newProps = {};
+        for (const [key, value] of Object.entries(result.properties)) {
+            newProps[key] = addAdditionalPropertiesHints(value);
+        }
+        result.properties = newProps;
+    }
+
+    // Recursively process items
+    if (result.items) {
+        result.items = Array.isArray(result.items)
+            ? result.items.map(addAdditionalPropertiesHints)
+            : addAdditionalPropertiesHints(result.items);
+    }
+
+    return result;
+}
+
+/**
+ * Move unsupported constraints to description hints.
+ * This preserves constraint information that would otherwise be lost
+ * when we strip unsupported keywords.
+ *
+ * @param {Object} schema - Schema to process
+ * @returns {Object} Schema with constraint hints added to description
+ */
+function moveConstraintsToDescription(schema) {
+    if (!schema || typeof schema !== 'object') return schema;
+    if (Array.isArray(schema)) return schema.map(moveConstraintsToDescription);
+
+    const CONSTRAINTS = ['minLength', 'maxLength', 'pattern', 'minimum', 'maximum',
+                         'minItems', 'maxItems', 'format'];
+
+    let result = { ...schema };
+
+    for (const constraint of CONSTRAINTS) {
+        if (result[constraint] !== undefined && typeof result[constraint] !== 'object') {
+            result = appendDescriptionHint(result, `${constraint}: ${result[constraint]}`);
+        }
+    }
+
+    // Recursively process properties
+    if (result.properties && typeof result.properties === 'object') {
+        const newProps = {};
+        for (const [key, value] of Object.entries(result.properties)) {
+            newProps[key] = moveConstraintsToDescription(value);
+        }
+        result.properties = newProps;
+    }
+
+    // Recursively process items
+    if (result.items) {
+        result.items = Array.isArray(result.items)
+            ? result.items.map(moveConstraintsToDescription)
+            : moveConstraintsToDescription(result.items);
+    }
+
+    return result;
+}
+
 /**
 * Flatten array type fields and track nullable properties.
 * Converts { type: ["string", "null"] } to { type: "string" } with nullable hint.
@@ -457,6 +578,15 @@ export function cleanSchemaForGemini(schema) {
    // Phase 1: Convert $refs to hints
    let result = convertRefsToHints(schema);

+    // Phase 1b: Add enum hints (preserves enum info in description)
+    result = addEnumHints(result);
+
+    // Phase 1c: Add additionalProperties hints
+    result = addAdditionalPropertiesHints(result);
+
+    // Phase 1d: Move constraints to description (before they get stripped)
+    result = moveConstraintsToDescription(result);
+
    // Phase 2a: Merge allOf schemas
    result = mergeAllOf(result);

--- a/src/format/signature-cache.js
+++ b/src/format/signature-cache.js
@@ -0,0 +1,65 @@
+/**
+ * Signature Cache
+ * In-memory cache for Gemini thoughtSignatures
+ *
+ * Gemini models require thoughtSignature on tool calls, but Claude Code
+ * strips non-standard fields. This cache stores signatures by tool_use_id
+ * so they can be restored in subsequent requests.
+ */
+
+import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js';
+
+const signatureCache = new Map();
+
+/**
+ * Store a signature for a tool_use_id
+ * @param {string} toolUseId - The tool use ID
+ * @param {string} signature - The thoughtSignature to cache
+ */
+export function cacheSignature(toolUseId, signature) {
+    if (!toolUseId || !signature) return;
+    signatureCache.set(toolUseId, {
+        signature,
+        timestamp: Date.now()
+    });
+}
+
+/**
+ * Get a cached signature for a tool_use_id
+ * @param {string} toolUseId - The tool use ID
+ * @returns {string|null} The cached signature or null if not found/expired
+ */
+export function getCachedSignature(toolUseId) {
+    if (!toolUseId) return null;
+    const entry = signatureCache.get(toolUseId);
+    if (!entry) return null;
+
+    // Check TTL
+    if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
+        signatureCache.delete(toolUseId);
+        return null;
+    }
+
+    return entry.signature;
+}
+
+/**
+ * Clear expired entries from the cache
+ * Can be called periodically to prevent memory buildup
+ */
+export function cleanupCache() {
+    const now = Date.now();
+    for (const [key, entry] of signatureCache) {
+        if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
+            signatureCache.delete(key);
+        }
+    }
+}
+
+/**
+ * Get the current cache size (for debugging)
+ * @returns {number} Number of entries in the cache
+ */
+export function getCacheSize() {
+    return signatureCache.size;
+}
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -257,3 +257,225 @@ export function reorderAssistantContent(content) {

    return reordered;
 }
+
+// ============================================================================
+// Thinking Recovery Functions
+// ============================================================================
+
+/**
+ * Check if a message has any VALID (signed) thinking blocks.
+ * Only counts thinking blocks that have valid signatures, not unsigned ones
+ * that will be dropped later.
+ *
+ * @param {Object} message - Message to check
+ * @returns {boolean} True if message has valid signed thinking blocks
+ */
+function messageHasValidThinking(message) {
+    const content = message.content || message.parts || [];
+    if (!Array.isArray(content)) return false;
+    return content.some(block => {
+        if (!isThinkingPart(block)) return false;
+        // Check for valid signature (Anthropic style)
+        if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) return true;
+        // Check for thoughtSignature (Gemini style on functionCall)
+        if (block.thoughtSignature && block.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) return true;
+        return false;
+    });
+}
+
+/**
+ * Check if a message has tool_use blocks
+ * @param {Object} message - Message to check
+ * @returns {boolean} True if message has tool_use blocks
+ */
+function messageHasToolUse(message) {
+    const content = message.content || message.parts || [];
+    if (!Array.isArray(content)) return false;
+    return content.some(block =>
+        block.type === 'tool_use' || block.functionCall
+    );
+}
+
+/**
+ * Check if a message has tool_result blocks
+ * @param {Object} message - Message to check
+ * @returns {boolean} True if message has tool_result blocks
+ */
+function messageHasToolResult(message) {
+    const content = message.content || message.parts || [];
+    if (!Array.isArray(content)) return false;
+    return content.some(block =>
+        block.type === 'tool_result' || block.functionResponse
+    );
+}
+
+/**
+ * Check if message is a plain user text message (not tool_result)
+ * @param {Object} message - Message to check
+ * @returns {boolean} True if message is plain user text
+ */
+function isPlainUserMessage(message) {
+    if (message.role !== 'user') return false;
+    const content = message.content || message.parts || [];
+    if (!Array.isArray(content)) return typeof content === 'string';
+    // Check if it has tool_result blocks
+    return !content.some(block =>
+        block.type === 'tool_result' || block.functionResponse
+    );
+}
+
+/**
+ * Analyze conversation state to detect if we're in a corrupted state.
+ * This includes:
+ * 1. Tool loop: assistant has tool_use followed by tool_results (normal flow)
+ * 2. Interrupted tool: assistant has tool_use followed by plain user message (interrupted)
+ *
+ * @param {Array<Object>} messages - Array of messages
+ * @returns {Object} State object with inToolLoop, interruptedTool, turnHasThinking, etc.
+ */
+export function analyzeConversationState(messages) {
+    if (!Array.isArray(messages) || messages.length === 0) {
+        return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
+    }
+
+    // Find the last assistant message
+    let lastAssistantIdx = -1;
+    for (let i = messages.length - 1; i >= 0; i--) {
+        if (messages[i].role === 'assistant' || messages[i].role === 'model') {
+            lastAssistantIdx = i;
+            break;
+        }
+    }
+
+    if (lastAssistantIdx === -1) {
+        return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
+    }
+
+    const lastAssistant = messages[lastAssistantIdx];
+    const hasToolUse = messageHasToolUse(lastAssistant);
+    const hasThinking = messageHasValidThinking(lastAssistant);
+
+    // Count trailing tool results after the assistant message
+    let toolResultCount = 0;
+    let hasPlainUserMessageAfter = false;
+    for (let i = lastAssistantIdx + 1; i < messages.length; i++) {
+        if (messageHasToolResult(messages[i])) {
+            toolResultCount++;
+        }
+        if (isPlainUserMessage(messages[i])) {
+            hasPlainUserMessageAfter = true;
+        }
+    }
+
+    // We're in a tool loop if: assistant has tool_use AND there are tool_results after
+    const inToolLoop = hasToolUse && toolResultCount > 0;
+
+    // We have an interrupted tool if: assistant has tool_use, NO tool_results,
+    // but there IS a plain user message after (user interrupted and sent new message)
+    const interruptedTool = hasToolUse && toolResultCount === 0 && hasPlainUserMessageAfter;
+
+    return {
+        inToolLoop,
+        interruptedTool,
+        turnHasThinking: hasThinking,
+        toolResultCount,
+        lastAssistantIdx
+    };
+}
+
+/**
+ * Check if conversation needs thinking recovery.
+ * Returns true when:
+ * 1. We're in a tool loop but have no valid thinking blocks, OR
+ * 2. We have an interrupted tool with no valid thinking blocks
+ *
+ * @param {Array<Object>} messages - Array of messages
+ * @returns {boolean} True if thinking recovery is needed
+ */
+export function needsThinkingRecovery(messages) {
+    const state = analyzeConversationState(messages);
+    // Need recovery if (tool loop OR interrupted tool) AND no thinking
+    return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
+}
+
+/**
+ * Strip all thinking blocks from messages.
+ * Used before injecting synthetic messages for recovery.
+ *
+ * @param {Array<Object>} messages - Array of messages
+ * @returns {Array<Object>} Messages with all thinking blocks removed
+ */
+function stripAllThinkingBlocks(messages) {
+    return messages.map(msg => {
+        const content = msg.content || msg.parts;
+        if (!Array.isArray(content)) return msg;
+
+        const filtered = content.filter(block => !isThinkingPart(block));
+
+        if (msg.content) {
+            return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] };
+        } else if (msg.parts) {
+            return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] };
+        }
+        return msg;
+    });
+}
+
+/**
+ * Close tool loop by injecting synthetic messages.
+ * This allows the model to start a fresh turn when thinking is corrupted.
+ *
+ * When thinking blocks are stripped (no valid signatures) and we're in the
+ * middle of a tool loop OR have an interrupted tool, the conversation is in
+ * a corrupted state. This function injects synthetic messages to close the
+ * loop and allow the model to continue.
+ *
+ * @param {Array<Object>} messages - Array of messages
+ * @returns {Array<Object>} Modified messages with synthetic messages injected
+ */
+export function closeToolLoopForThinking(messages) {
+    const state = analyzeConversationState(messages);
+
+    // Handle neither tool loop nor interrupted tool
+    if (!state.inToolLoop && !state.interruptedTool) return messages;
+
+    // Strip all thinking blocks
+    let modified = stripAllThinkingBlocks(messages);
+
+    if (state.interruptedTool) {
+        // For interrupted tools: just strip thinking and add a synthetic assistant message
+        // to acknowledge the interruption before the user's new message
+
+        // Find where to insert the synthetic message (before the plain user message)
+        const insertIdx = state.lastAssistantIdx + 1;
+
+        // Insert synthetic assistant message acknowledging interruption
+        modified.splice(insertIdx, 0, {
+            role: 'assistant',
+            content: [{ type: 'text', text: '[Tool call was interrupted.]' }]
+        });
+
+        console.log('[ThinkingUtils] Applied thinking recovery for interrupted tool');
+    } else {
+        // For tool loops: add synthetic messages to close the loop
+        const syntheticText = state.toolResultCount === 1
+            ? '[Tool execution completed.]'
+            : `[${state.toolResultCount} tool executions completed.]`;
+
+        // Inject synthetic model message to complete the turn
+        modified.push({
+            role: 'assistant',
+            content: [{ type: 'text', text: syntheticText }]
+        });
+
+        // Inject synthetic user message to start fresh
+        modified.push({
+            role: 'user',
+            content: [{ type: 'text', text: '[Continue]' }]
+        });
+
+        console.log('[ThinkingUtils] Applied thinking recovery for tool loop');
+    }
+
+    return modified;
+}