Implement Gemini signature caching and thinking recovery

- Add in-memory signature cache to restore thoughtSignatures stripped by Claude Code
- Implement thinking recovery logic to handle interrupted tool loops for Gemini
- Enhance schema sanitizer to preserve constraints and enums as description hints
- Update CLAUDE.md with new architecture details

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2025-12-28 14:34:03 +05:30
parent 1eb2329f7c
commit 426acc494a
9 changed files with 473 additions and 20 deletions

View File

@@ -54,8 +54,9 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav
- `request-converter.js` - Anthropic → Google request conversion - `request-converter.js` - Anthropic → Google request conversion
- `response-converter.js` - Google → Anthropic response conversion - `response-converter.js` - Google → Anthropic response conversion
- `content-converter.js` - Message content and role conversion - `content-converter.js` - Message content and role conversion
- `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility - `schema-sanitizer.js` - JSON Schema cleaning for Gemini API compatibility (preserves constraints/enums as hints)
- `thinking-utils.js` - Thinking block validation, filtering, and reordering - `thinking-utils.js` - Thinking block validation, filtering, reordering, and recovery logic
- `signature-cache.js` - In-memory cache for Gemini thoughtSignatures
- **src/account-manager.js**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown - **src/account-manager.js**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown
- **src/oauth.js**: Google OAuth implementation for adding accounts - **src/oauth.js**: Google OAuth implementation for adding accounts
- **src/token-extractor.js**: Extracts tokens from local Antigravity app installation (legacy single-account mode) - **src/token-extractor.js**: Extracts tokens from local Antigravity app installation (legacy single-account mode)
@@ -94,8 +95,8 @@ Claude Code CLI → Express Server (server.js) → CloudCode Client → Antigrav
**Model Family Handling:** **Model Family Handling:**
- `getModelFamily(model)` returns `'claude'` or `'gemini'` based on model name - `getModelFamily(model)` returns `'claude'` or `'gemini'` based on model name
- Claude models use `signature` field on thinking blocks - Claude models use `signature` field on thinking blocks
- Gemini models use `thoughtSignature` field on functionCall parts - Gemini models use `thoughtSignature` field on functionCall parts (cached or sentinel value)
- When Claude Code strips `thoughtSignature`, the proxy uses Google's `skip_thought_signature_validator` sentinel value - When Claude Code strips `thoughtSignature`, the proxy tries to restore from cache, then falls back to `skip_thought_signature_validator`
**Error Handling:** Use custom error classes from `src/errors.js`: **Error Handling:** Use custom error classes from `src/errors.js`:
- `RateLimitError` - 429/RESOURCE_EXHAUSTED errors - `RateLimitError` - 429/RESOURCE_EXHAUSTED errors

View File

@@ -23,6 +23,7 @@ import {
convertAnthropicToGoogle, convertAnthropicToGoogle,
convertGoogleToAnthropic convertGoogleToAnthropic
} from './format/index.js'; } from './format/index.js';
import { cacheSignature } from './format/signature-cache.js';
import { formatDuration, sleep } from './utils/helpers.js'; import { formatDuration, sleep } from './utils/helpers.js';
import { isRateLimitError, isAuthError } from './errors.js'; import { isRateLimitError, isAuthError } from './errors.js';
@@ -848,6 +849,8 @@ async function* streamSSEResponse(response, originalModel) {
// Store the signature in the tool_use block for later retrieval // Store the signature in the tool_use block for later retrieval
if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) { if (functionCallSignature && functionCallSignature.length >= MIN_SIGNATURE_LENGTH) {
toolUseBlock.thoughtSignature = functionCallSignature; toolUseBlock.thoughtSignature = functionCallSignature;
// Cache for future requests (Claude Code may strip this field)
cacheSignature(toolId, functionCallSignature);
} }
yield { yield {

View File

@@ -87,6 +87,14 @@ export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature leng
// Gemini-specific limits // Gemini-specific limits
export const GEMINI_MAX_OUTPUT_TOKENS = 16384; export const GEMINI_MAX_OUTPUT_TOKENS = 16384;
// Gemini signature handling
// Sentinel value to skip thought signature validation when Claude Code strips the field
// See: https://ai.google.dev/gemini-api/docs/thought-signatures
export const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator';
// Cache TTL for Gemini thoughtSignatures (2 hours)
export const GEMINI_SIGNATURE_CACHE_TTL_MS = 2 * 60 * 60 * 1000;
/** /**
* Get the model family from model name (dynamic detection, no hardcoded list). * Get the model family from model name (dynamic detection, no hardcoded list).
* @param {string} modelName - The model name from the request * @param {string} modelName - The model name from the request
@@ -152,6 +160,8 @@ export default {
MAX_WAIT_BEFORE_ERROR_MS, MAX_WAIT_BEFORE_ERROR_MS,
MIN_SIGNATURE_LENGTH, MIN_SIGNATURE_LENGTH,
GEMINI_MAX_OUTPUT_TOKENS, GEMINI_MAX_OUTPUT_TOKENS,
GEMINI_SKIP_SIGNATURE,
GEMINI_SIGNATURE_CACHE_TTL_MS,
getModelFamily, getModelFamily,
isThinkingModel, isThinkingModel,
OAUTH_CONFIG, OAUTH_CONFIG,

View File

@@ -3,15 +3,8 @@
* Converts Anthropic message content to Google Generative AI parts format * Converts Anthropic message content to Google Generative AI parts format
*/ */
import { MIN_SIGNATURE_LENGTH } from '../constants.js'; import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js';
import { getCachedSignature } from './signature-cache.js';
/**
* Sentinel value to skip thought signature validation for Gemini models.
* Per Google documentation, this value can be used when Claude Code strips
* the thoughtSignature field from tool_use blocks in multi-turn requests.
* See: https://ai.google.dev/gemini-api/docs/thought-signatures
*/
const GEMINI_SKIP_SIGNATURE = 'skip_thought_signature_validator';
/** /**
* Convert Anthropic role to Google role * Convert Anthropic role to Google role
@@ -102,10 +95,17 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
// For Gemini models, include thoughtSignature at the part level // For Gemini models, include thoughtSignature at the part level
// This is required by Gemini 3+ for tool calls to work correctly // This is required by Gemini 3+ for tool calls to work correctly
if (isGeminiModel) { if (isGeminiModel) {
// Use thoughtSignature from the block if Claude Code preserved it // Priority: block.thoughtSignature > cache > GEMINI_SKIP_SIGNATURE
// Otherwise, use the sentinel value to skip validation (Claude Code strips non-standard fields) let signature = block.thoughtSignature;
// See: https://ai.google.dev/gemini-api/docs/thought-signatures
part.thoughtSignature = block.thoughtSignature || GEMINI_SKIP_SIGNATURE; if (!signature && block.id) {
signature = getCachedSignature(block.id);
if (signature) {
console.log('[ContentConverter] Restored signature from cache for:', block.id);
}
}
part.thoughtSignature = signature || GEMINI_SKIP_SIGNATURE;
} }
parts.push(part); parts.push(part);

View File

@@ -14,7 +14,9 @@ import {
restoreThinkingSignatures, restoreThinkingSignatures,
removeTrailingThinkingBlocks, removeTrailingThinkingBlocks,
reorderAssistantContent, reorderAssistantContent,
filterUnsignedThinkingBlocks filterUnsignedThinkingBlocks,
needsThinkingRecovery,
closeToolLoopForThinking
} from './thinking-utils.js'; } from './thinking-utils.js';
/** /**
@@ -74,9 +76,18 @@ export function convertAnthropicToGoogle(anthropicRequest) {
} }
} }
// Apply thinking recovery for Gemini thinking models when needed
// This handles corrupted tool loops where thinking blocks are stripped
// Claude models handle this differently and don't need this recovery
let processedMessages = messages;
if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
console.log('[RequestConverter] Applying thinking recovery for Gemini');
processedMessages = closeToolLoopForThinking(messages);
}
// Convert messages to contents, then filter unsigned thinking blocks // Convert messages to contents, then filter unsigned thinking blocks
for (let i = 0; i < messages.length; i++) { for (let i = 0; i < processedMessages.length; i++) {
const msg = messages[i]; const msg = processedMessages[i];
let msgContent = msg.content; let msgContent = msg.content;
// For assistant messages, process thinking blocks and reorder content // For assistant messages, process thinking blocks and reorder content
@@ -90,6 +101,14 @@ export function convertAnthropicToGoogle(anthropicRequest) {
} }
const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel); const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel);
// SAFETY: Google API requires at least one part per content message
// This happens when all thinking blocks are filtered out (unsigned)
if (parts.length === 0) {
console.log('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
parts.push({ text: '' });
}
const content = { const content = {
role: convertRole(msg.role), role: convertRole(msg.role),
parts: parts parts: parts

View File

@@ -5,6 +5,7 @@
import crypto from 'crypto'; import crypto from 'crypto';
import { MIN_SIGNATURE_LENGTH } from '../constants.js'; import { MIN_SIGNATURE_LENGTH } from '../constants.js';
import { cacheSignature } from './signature-cache.js';
/** /**
* Convert Google Generative AI response to Anthropic Messages API format * Convert Google Generative AI response to Anthropic Messages API format
@@ -58,6 +59,8 @@ export function convertGoogleToAnthropic(googleResponse, model) {
// For Gemini 3+, include thoughtSignature from the part level // For Gemini 3+, include thoughtSignature from the part level
if (part.thoughtSignature && part.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) { if (part.thoughtSignature && part.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) {
toolUseBlock.thoughtSignature = part.thoughtSignature; toolUseBlock.thoughtSignature = part.thoughtSignature;
// Cache for future requests (Claude Code may strip this field)
cacheSignature(toolId, part.thoughtSignature);
} }
anthropicContent.push(toolUseBlock); anthropicContent.push(toolUseBlock);

View File

@@ -289,6 +289,127 @@ function flattenAnyOfOneOf(schema) {
return result; return result;
} }
// ============================================================================
// Enhanced Schema Hints (for preserving semantic information)
// ============================================================================
/**
* Add hints for enum values (if ≤10 values).
* This preserves enum information in the description since Gemini
* may not fully support enums in all cases.
*
* @param {Object} schema - Schema to process
* @returns {Object} Schema with enum hints added to description
*/
function addEnumHints(schema) {
if (!schema || typeof schema !== 'object') return schema;
if (Array.isArray(schema)) return schema.map(addEnumHints);
let result = { ...schema };
// Add enum hint if present and reasonable size
if (Array.isArray(result.enum) && result.enum.length > 1 && result.enum.length <= 10) {
const vals = result.enum.map(v => String(v)).join(', ');
result = appendDescriptionHint(result, `Allowed: ${vals}`);
}
// Recursively process properties
if (result.properties && typeof result.properties === 'object') {
const newProps = {};
for (const [key, value] of Object.entries(result.properties)) {
newProps[key] = addEnumHints(value);
}
result.properties = newProps;
}
// Recursively process items
if (result.items) {
result.items = Array.isArray(result.items)
? result.items.map(addEnumHints)
: addEnumHints(result.items);
}
return result;
}
/**
* Add hints for additionalProperties: false.
* This informs the model that extra properties are not allowed.
*
* @param {Object} schema - Schema to process
* @returns {Object} Schema with additionalProperties hints added
*/
function addAdditionalPropertiesHints(schema) {
if (!schema || typeof schema !== 'object') return schema;
if (Array.isArray(schema)) return schema.map(addAdditionalPropertiesHints);
let result = { ...schema };
if (result.additionalProperties === false) {
result = appendDescriptionHint(result, 'No extra properties allowed');
}
// Recursively process properties
if (result.properties && typeof result.properties === 'object') {
const newProps = {};
for (const [key, value] of Object.entries(result.properties)) {
newProps[key] = addAdditionalPropertiesHints(value);
}
result.properties = newProps;
}
// Recursively process items
if (result.items) {
result.items = Array.isArray(result.items)
? result.items.map(addAdditionalPropertiesHints)
: addAdditionalPropertiesHints(result.items);
}
return result;
}
/**
* Move unsupported constraints to description hints.
* This preserves constraint information that would otherwise be lost
* when we strip unsupported keywords.
*
* @param {Object} schema - Schema to process
* @returns {Object} Schema with constraint hints added to description
*/
function moveConstraintsToDescription(schema) {
if (!schema || typeof schema !== 'object') return schema;
if (Array.isArray(schema)) return schema.map(moveConstraintsToDescription);
const CONSTRAINTS = ['minLength', 'maxLength', 'pattern', 'minimum', 'maximum',
'minItems', 'maxItems', 'format'];
let result = { ...schema };
for (const constraint of CONSTRAINTS) {
if (result[constraint] !== undefined && typeof result[constraint] !== 'object') {
result = appendDescriptionHint(result, `${constraint}: ${result[constraint]}`);
}
}
// Recursively process properties
if (result.properties && typeof result.properties === 'object') {
const newProps = {};
for (const [key, value] of Object.entries(result.properties)) {
newProps[key] = moveConstraintsToDescription(value);
}
result.properties = newProps;
}
// Recursively process items
if (result.items) {
result.items = Array.isArray(result.items)
? result.items.map(moveConstraintsToDescription)
: moveConstraintsToDescription(result.items);
}
return result;
}
/** /**
* Flatten array type fields and track nullable properties. * Flatten array type fields and track nullable properties.
* Converts { type: ["string", "null"] } to { type: "string" } with nullable hint. * Converts { type: ["string", "null"] } to { type: "string" } with nullable hint.
@@ -457,6 +578,15 @@ export function cleanSchemaForGemini(schema) {
// Phase 1: Convert $refs to hints // Phase 1: Convert $refs to hints
let result = convertRefsToHints(schema); let result = convertRefsToHints(schema);
// Phase 1b: Add enum hints (preserves enum info in description)
result = addEnumHints(result);
// Phase 1c: Add additionalProperties hints
result = addAdditionalPropertiesHints(result);
// Phase 1d: Move constraints to description (before they get stripped)
result = moveConstraintsToDescription(result);
// Phase 2a: Merge allOf schemas // Phase 2a: Merge allOf schemas
result = mergeAllOf(result); result = mergeAllOf(result);

View File

@@ -0,0 +1,65 @@
/**
* Signature Cache
* In-memory cache for Gemini thoughtSignatures
*
* Gemini models require thoughtSignature on tool calls, but Claude Code
* strips non-standard fields. This cache stores signatures by tool_use_id
* so they can be restored in subsequent requests.
*/
import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js';
const signatureCache = new Map();
/**
* Store a signature for a tool_use_id
* @param {string} toolUseId - The tool use ID
* @param {string} signature - The thoughtSignature to cache
*/
export function cacheSignature(toolUseId, signature) {
if (!toolUseId || !signature) return;
signatureCache.set(toolUseId, {
signature,
timestamp: Date.now()
});
}
/**
* Get a cached signature for a tool_use_id
* @param {string} toolUseId - The tool use ID
* @returns {string|null} The cached signature or null if not found/expired
*/
export function getCachedSignature(toolUseId) {
if (!toolUseId) return null;
const entry = signatureCache.get(toolUseId);
if (!entry) return null;
// Check TTL
if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
signatureCache.delete(toolUseId);
return null;
}
return entry.signature;
}
/**
* Clear expired entries from the cache
* Can be called periodically to prevent memory buildup
*/
export function cleanupCache() {
const now = Date.now();
for (const [key, entry] of signatureCache) {
if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
signatureCache.delete(key);
}
}
}
/**
* Get the current cache size (for debugging)
* @returns {number} Number of entries in the cache
*/
export function getCacheSize() {
return signatureCache.size;
}

View File

@@ -257,3 +257,225 @@ export function reorderAssistantContent(content) {
return reordered; return reordered;
} }
// ============================================================================
// Thinking Recovery Functions
// ============================================================================
/**
* Check if a message has any VALID (signed) thinking blocks.
* Only counts thinking blocks that have valid signatures, not unsigned ones
* that will be dropped later.
*
* @param {Object} message - Message to check
* @returns {boolean} True if message has valid signed thinking blocks
*/
function messageHasValidThinking(message) {
const content = message.content || message.parts || [];
if (!Array.isArray(content)) return false;
return content.some(block => {
if (!isThinkingPart(block)) return false;
// Check for valid signature (Anthropic style)
if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) return true;
// Check for thoughtSignature (Gemini style on functionCall)
if (block.thoughtSignature && block.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) return true;
return false;
});
}
/**
* Check if a message has tool_use blocks
* @param {Object} message - Message to check
* @returns {boolean} True if message has tool_use blocks
*/
function messageHasToolUse(message) {
const content = message.content || message.parts || [];
if (!Array.isArray(content)) return false;
return content.some(block =>
block.type === 'tool_use' || block.functionCall
);
}
/**
* Check if a message has tool_result blocks
* @param {Object} message - Message to check
* @returns {boolean} True if message has tool_result blocks
*/
function messageHasToolResult(message) {
const content = message.content || message.parts || [];
if (!Array.isArray(content)) return false;
return content.some(block =>
block.type === 'tool_result' || block.functionResponse
);
}
/**
* Check if message is a plain user text message (not tool_result)
* @param {Object} message - Message to check
* @returns {boolean} True if message is plain user text
*/
function isPlainUserMessage(message) {
if (message.role !== 'user') return false;
const content = message.content || message.parts || [];
if (!Array.isArray(content)) return typeof content === 'string';
// Check if it has tool_result blocks
return !content.some(block =>
block.type === 'tool_result' || block.functionResponse
);
}
/**
* Analyze conversation state to detect if we're in a corrupted state.
* This includes:
* 1. Tool loop: assistant has tool_use followed by tool_results (normal flow)
* 2. Interrupted tool: assistant has tool_use followed by plain user message (interrupted)
*
* @param {Array<Object>} messages - Array of messages
* @returns {Object} State object with inToolLoop, interruptedTool, turnHasThinking, etc.
*/
export function analyzeConversationState(messages) {
if (!Array.isArray(messages) || messages.length === 0) {
return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
}
// Find the last assistant message
let lastAssistantIdx = -1;
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === 'assistant' || messages[i].role === 'model') {
lastAssistantIdx = i;
break;
}
}
if (lastAssistantIdx === -1) {
return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
}
const lastAssistant = messages[lastAssistantIdx];
const hasToolUse = messageHasToolUse(lastAssistant);
const hasThinking = messageHasValidThinking(lastAssistant);
// Count trailing tool results after the assistant message
let toolResultCount = 0;
let hasPlainUserMessageAfter = false;
for (let i = lastAssistantIdx + 1; i < messages.length; i++) {
if (messageHasToolResult(messages[i])) {
toolResultCount++;
}
if (isPlainUserMessage(messages[i])) {
hasPlainUserMessageAfter = true;
}
}
// We're in a tool loop if: assistant has tool_use AND there are tool_results after
const inToolLoop = hasToolUse && toolResultCount > 0;
// We have an interrupted tool if: assistant has tool_use, NO tool_results,
// but there IS a plain user message after (user interrupted and sent new message)
const interruptedTool = hasToolUse && toolResultCount === 0 && hasPlainUserMessageAfter;
return {
inToolLoop,
interruptedTool,
turnHasThinking: hasThinking,
toolResultCount,
lastAssistantIdx
};
}
/**
* Check if conversation needs thinking recovery.
* Returns true when:
* 1. We're in a tool loop but have no valid thinking blocks, OR
* 2. We have an interrupted tool with no valid thinking blocks
*
* @param {Array<Object>} messages - Array of messages
* @returns {boolean} True if thinking recovery is needed
*/
export function needsThinkingRecovery(messages) {
const state = analyzeConversationState(messages);
// Need recovery if (tool loop OR interrupted tool) AND no thinking
return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
}
/**
* Strip all thinking blocks from messages.
* Used before injecting synthetic messages for recovery.
*
* @param {Array<Object>} messages - Array of messages
* @returns {Array<Object>} Messages with all thinking blocks removed
*/
function stripAllThinkingBlocks(messages) {
return messages.map(msg => {
const content = msg.content || msg.parts;
if (!Array.isArray(content)) return msg;
const filtered = content.filter(block => !isThinkingPart(block));
if (msg.content) {
return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] };
} else if (msg.parts) {
return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] };
}
return msg;
});
}
/**
* Close tool loop by injecting synthetic messages.
* This allows the model to start a fresh turn when thinking is corrupted.
*
* When thinking blocks are stripped (no valid signatures) and we're in the
* middle of a tool loop OR have an interrupted tool, the conversation is in
* a corrupted state. This function injects synthetic messages to close the
* loop and allow the model to continue.
*
* @param {Array<Object>} messages - Array of messages
* @returns {Array<Object>} Modified messages with synthetic messages injected
*/
export function closeToolLoopForThinking(messages) {
const state = analyzeConversationState(messages);
// Handle neither tool loop nor interrupted tool
if (!state.inToolLoop && !state.interruptedTool) return messages;
// Strip all thinking blocks
let modified = stripAllThinkingBlocks(messages);
if (state.interruptedTool) {
// For interrupted tools: just strip thinking and add a synthetic assistant message
// to acknowledge the interruption before the user's new message
// Find where to insert the synthetic message (before the plain user message)
const insertIdx = state.lastAssistantIdx + 1;
// Insert synthetic assistant message acknowledging interruption
modified.splice(insertIdx, 0, {
role: 'assistant',
content: [{ type: 'text', text: '[Tool call was interrupted.]' }]
});
console.log('[ThinkingUtils] Applied thinking recovery for interrupted tool');
} else {
// For tool loops: add synthetic messages to close the loop
const syntheticText = state.toolResultCount === 1
? '[Tool execution completed.]'
: `[${state.toolResultCount} tool executions completed.]`;
// Inject synthetic model message to complete the turn
modified.push({
role: 'assistant',
content: [{ type: 'text', text: syntheticText }]
});
// Inject synthetic user message to start fresh
modified.push({
role: 'user',
content: [{ type: 'text', text: '[Continue]' }]
});
console.log('[ThinkingUtils] Applied thinking recovery for tool loop');
}
return modified;
}