From df6625b531d0ff0816e7930af689f831dfe70c7c Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 18:01:21 +0530 Subject: [PATCH 01/10] fallback changes from PR #35 --- src/cloudcode/message-handler.js | 13 ++++++++++- src/cloudcode/streaming-handler.js | 14 +++++++++++- src/fallback-config.js | 36 ++++++++++++++++++++++++++++++ src/index.js | 22 +++++++++++++++--- src/server.js | 8 +++++-- 5 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 src/fallback-config.js diff --git a/src/cloudcode/message-handler.js b/src/cloudcode/message-handler.js index 4491afc..beb6745 100644 --- a/src/cloudcode/message-handler.js +++ b/src/cloudcode/message-handler.js @@ -18,6 +18,7 @@ import { logger } from '../utils/logger.js'; import { parseResetTime } from './rate-limit-parser.js'; import { buildCloudCodeRequest, buildHeaders } from './request-builder.js'; import { parseThinkingSSEResponse } from './sse-parser.js'; +import { getFallbackModel } from '../fallback-config.js'; /** * Send a non-streaming request to Cloud Code with multi-account support @@ -32,7 +33,7 @@ import { parseThinkingSSEResponse } from './sse-parser.js'; * @returns {Promise} Anthropic-format response object * @throws {Error} If max retries exceeded or no accounts available */ -export async function sendMessage(anthropicRequest, accountManager) { +export async function sendMessage(anthropicRequest, accountManager, fallbackEnabled = false) { const model = anthropicRequest.model; const isThinking = isThinkingModel(model); @@ -76,6 +77,16 @@ export async function sendMessage(anthropicRequest, accountManager) { } if (!account) { + // Check if fallback is enabled and available + if (fallbackEnabled) { + const fallbackModel = getFallbackModel(model); + if (fallbackModel) { + logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`); + // Retry with fallback model + const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; + return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call + } + } throw new Error('No accounts available'); } } diff --git a/src/cloudcode/streaming-handler.js b/src/cloudcode/streaming-handler.js index f3af687..f33945b 100644 --- a/src/cloudcode/streaming-handler.js +++ b/src/cloudcode/streaming-handler.js @@ -16,6 +16,7 @@ import { logger } from '../utils/logger.js'; import { parseResetTime } from './rate-limit-parser.js'; import { buildCloudCodeRequest, buildHeaders } from './request-builder.js'; import { streamSSEResponse } from './sse-streamer.js'; +import { getFallbackModel } from '../fallback-config.js'; /** @@ -31,7 +32,7 @@ import { streamSSEResponse } from './sse-streamer.js'; * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.) * @throws {Error} If max retries exceeded or no accounts available */ -export async function* sendMessageStream(anthropicRequest, accountManager) { +export async function* sendMessageStream(anthropicRequest, accountManager, fallbackEnabled = false) { const model = anthropicRequest.model; // Retry loop with account failover @@ -74,6 +75,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager) { } if (!account) { + // Check if fallback is enabled and available + if (fallbackEnabled) { + const fallbackModel = getFallbackModel(model); + if (fallbackModel) { + logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`); + // Retry with fallback model + const fallbackRequest = { ...anthropicRequest, model: fallbackModel }; + yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call + return; + } + } throw new Error('No accounts available'); } } diff --git a/src/fallback-config.js b/src/fallback-config.js new file mode 100644 index 0000000..880e5ac --- /dev/null +++ b/src/fallback-config.js @@ -0,0 +1,36 @@ +/** + * Model Fallback Configuration + * + * Defines fallback mappings for when a model's quota is exhausted across all accounts. + * Enables graceful degradation to alternative models with similar capabilities. + */ + +/** + * Model fallback mapping + * Maps primary model ID to fallback model ID + */ +export const MODEL_FALLBACK_MAP = { + 'gemini-3-pro-high': 'claude-sonnet-4-5-thinking', + 'gemini-3-pro-low': 'claude-sonnet-4-5', + 'claude-opus-4-5-thinking': 'gemini-3-pro-high', + 'claude-sonnet-4-5-thinking': 'gemini-3-pro-high', + 'claude-sonnet-4-5': 'gemini-3-pro-low' +}; + +/** + * Get fallback model for a given model ID + * @param {string} model - Primary model ID + * @returns {string|null} Fallback model ID or null if no fallback exists + */ +export function getFallbackModel(model) { + return MODEL_FALLBACK_MAP[model] || null; +} + +/** + * Check if a model has a fallback configured + * @param {string} model - Model ID to check + * @returns {boolean} True if fallback exists + */ +export function hasFallback(model) { + return model in MODEL_FALLBACK_MAP; +} diff --git a/src/index.js b/src/index.js index 3c71759..b439884 100644 --- a/src/index.js +++ b/src/index.js @@ -12,6 +12,7 @@ import os from 'os'; // Parse command line arguments const args = process.argv.slice(2); const isDebug = args.includes('--debug') || process.env.DEBUG === 'true'; +const isFallbackEnabled = args.includes('--fallback') || process.env.FALLBACK === 'true'; // Initialize logger logger.setDebug(isDebug); @@ -20,6 +21,13 @@ if (isDebug) { logger.debug('Debug mode enabled'); } +if (isFallbackEnabled) { + logger.info('Model fallback mode enabled'); +} + +// Export fallback flag for server to use +export const FALLBACK_ENABLED = isFallbackEnabled; + const PORT = process.env.PORT || DEFAULT_PORT; // Home directory for account storage @@ -40,14 +48,22 @@ app.listen(PORT, () => { if (!isDebug) { controlSection += '║ --debug Enable debug logging ║\n'; } + if (!isFallbackEnabled) { + controlSection += '║ --fallback Enable model fallback on quota exhaust ║\n'; + } controlSection += '║ Ctrl+C Stop server ║'; - // Build status section if debug mode is active + // Build status section if any modes are active let statusSection = ''; - if (isDebug) { + if (isDebug || isFallbackEnabled) { statusSection = '║ ║\n'; statusSection += '║ Active Modes: ║\n'; - statusSection += '║ ✓ Debug mode enabled ║\n'; + if (isDebug) { + statusSection += '║ ✓ Debug mode enabled ║\n'; + } + if (isFallbackEnabled) { + statusSection += '║ ✓ Model fallback enabled ║\n'; + } } logger.log(` diff --git a/src/server.js b/src/server.js index f0ccb5d..587b393 100644 --- a/src/server.js +++ b/src/server.js @@ -13,6 +13,10 @@ import { AccountManager } from './account-manager/index.js'; import { formatDuration } from './utils/helpers.js'; import { logger } from './utils/logger.js'; +// Parse fallback flag directly from command line args to avoid circular dependency +const args = process.argv.slice(2); +const FALLBACK_ENABLED = args.includes('--fallback') || process.env.FALLBACK === 'true'; + const app = express(); // Initialize account manager (will be fully initialized on first request or startup) @@ -595,7 +599,7 @@ app.post('/v1/messages', async (req, res) => { try { // Use the streaming generator with account manager - for await (const event of sendMessageStream(request, accountManager)) { + for await (const event of sendMessageStream(request, accountManager, FALLBACK_ENABLED)) { res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`); // Flush after each event for real-time streaming if (res.flush) res.flush(); @@ -616,7 +620,7 @@ app.post('/v1/messages', async (req, res) => { } else { // Handle non-streaming response - const response = await sendMessage(request, accountManager); + const response = await sendMessage(request, accountManager, FALLBACK_ENABLED); res.json(response); } From ac9ec6b3584a2aa4aaf91695eeb867fdab076da3 Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 22:01:57 +0530 Subject: [PATCH 02/10] Signature handling for fallback --- package.json | 3 +- src/cloudcode/sse-streamer.js | 7 +- src/format/content-converter.js | 23 +- src/format/request-converter.js | 14 +- src/format/response-converter.js | 10 +- src/format/signature-cache.js | 51 ++- src/format/thinking-utils.js | 23 +- tests/test-cross-model-thinking.cjs | 511 ++++++++++++++++++++++++++++ 8 files changed, 618 insertions(+), 24 deletions(-) create mode 100644 tests/test-cross-model-thinking.cjs diff --git a/package.json b/package.json index 815c9a7..365945e 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,8 @@ "test:streaming": "node tests/test-multiturn-thinking-tools-streaming.cjs", "test:interleaved": "node tests/test-interleaved-thinking.cjs", "test:images": "node tests/test-images.cjs", - "test:caching": "node tests/test-caching-streaming.cjs" + "test:caching": "node tests/test-caching-streaming.cjs", + "test:crossmodel": "node tests/test-cross-model-thinking.cjs" }, "keywords": [ "claude", diff --git a/src/cloudcode/sse-streamer.js b/src/cloudcode/sse-streamer.js index 8c8974e..eaf9136 100644 --- a/src/cloudcode/sse-streamer.js +++ b/src/cloudcode/sse-streamer.js @@ -6,8 +6,8 @@ */ import crypto from 'crypto'; -import { MIN_SIGNATURE_LENGTH } from '../constants.js'; -import { cacheSignature } from '../format/signature-cache.js'; +import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js'; +import { cacheSignature, cacheThinkingSignature } from '../format/signature-cache.js'; import { logger } from '../utils/logger.js'; /** @@ -110,6 +110,9 @@ export async function* streamSSEResponse(response, originalModel) { if (signature && signature.length >= MIN_SIGNATURE_LENGTH) { currentThinkingSignature = signature; + // Cache thinking signature with model family for cross-model compatibility + const modelFamily = getModelFamily(originalModel); + cacheThinkingSignature(signature, modelFamily); } yield { diff --git a/src/format/content-converter.js b/src/format/content-converter.js index 052eb0a..8599847 100644 --- a/src/format/content-converter.js +++ b/src/format/content-converter.js @@ -4,7 +4,7 @@ */ import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js'; -import { getCachedSignature } from './signature-cache.js'; +import { getCachedSignature, getCachedSignatureFamily } from './signature-cache.js'; import { logger } from '../utils/logger.js'; /** @@ -155,16 +155,31 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo // Add any images from the tool result as separate parts parts.push(...imageParts); } else if (block.type === 'thinking') { - // Handle thinking blocks - only those with valid signatures + // Handle thinking blocks with signature compatibility check if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) { - // Convert to Gemini format with signature + const signatureFamily = getCachedSignatureFamily(block.signature); + const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null; + + // Drop blocks with incompatible signatures for Gemini (cross-model switch) + if (isGeminiModel && signatureFamily && targetFamily && signatureFamily !== targetFamily) { + logger.debug(`[ContentConverter] Dropping incompatible ${signatureFamily} thinking for ${targetFamily} model`); + continue; + } + + // Drop blocks with unknown signature origin for Gemini (cold cache - safe default) + if (isGeminiModel && !signatureFamily && targetFamily) { + logger.debug(`[ContentConverter] Dropping thinking with unknown signature origin`); + continue; + } + + // Compatible - convert to Gemini format with signature parts.push({ text: block.thinking, thought: true, thoughtSignature: block.signature }); } - // Unsigned thinking blocks are dropped upstream + // Unsigned thinking blocks are dropped (existing behavior) } } diff --git a/src/format/request-converter.js b/src/format/request-converter.js index 17e67ed..7343439 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -77,12 +77,14 @@ export function convertAnthropicToGoogle(anthropicRequest) { } } - // Apply thinking recovery for Gemini thinking models when needed - // This handles corrupted tool loops where thinking blocks are stripped - // Claude models handle this differently and don't need this recovery + // Apply thinking recovery for thinking models when needed + // - Gemini: needs recovery for tool loops/interrupted tools (stripped thinking) + // - Claude: needs recovery ONLY when cross-model (incompatible Gemini signatures will be dropped) let processedMessages = messages; - if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) { - logger.debug('[RequestConverter] Applying thinking recovery for Gemini'); + const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null; + + if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) { + logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`); processedMessages = closeToolLoopForThinking(messages); } @@ -107,7 +109,7 @@ export function convertAnthropicToGoogle(anthropicRequest) { // This happens when all thinking blocks are filtered out (unsigned) if (parts.length === 0) { logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder'); - parts.push({ text: '' }); + parts.push({ text: '.' }); } const content = { diff --git a/src/format/response-converter.js b/src/format/response-converter.js index 59b919b..c58d57c 100644 --- a/src/format/response-converter.js +++ b/src/format/response-converter.js @@ -4,8 +4,8 @@ */ import crypto from 'crypto'; -import { MIN_SIGNATURE_LENGTH } from '../constants.js'; -import { cacheSignature } from './signature-cache.js'; +import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js'; +import { cacheSignature, cacheThinkingSignature } from './signature-cache.js'; /** * Convert Google Generative AI response to Anthropic Messages API format @@ -33,6 +33,12 @@ export function convertGoogleToAnthropic(googleResponse, model) { if (part.thought === true) { const signature = part.thoughtSignature || ''; + // Cache thinking signature with model family for cross-model compatibility + if (signature && signature.length >= MIN_SIGNATURE_LENGTH) { + const modelFamily = getModelFamily(model); + cacheThinkingSignature(signature, modelFamily); + } + // Include thinking blocks in the response for Claude Code anthropicContent.push({ type: 'thinking', diff --git a/src/format/signature-cache.js b/src/format/signature-cache.js index 49154ee..944c821 100644 --- a/src/format/signature-cache.js +++ b/src/format/signature-cache.js @@ -5,11 +5,15 @@ * Gemini models require thoughtSignature on tool calls, but Claude Code * strips non-standard fields. This cache stores signatures by tool_use_id * so they can be restored in subsequent requests. + * + * Also caches thinking block signatures with model family for cross-model + * compatibility checking. */ -import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js'; +import { GEMINI_SIGNATURE_CACHE_TTL_MS, MIN_SIGNATURE_LENGTH } from '../constants.js'; const signatureCache = new Map(); +const thinkingSignatureCache = new Map(); /** * Store a signature for a tool_use_id @@ -54,6 +58,11 @@ export function cleanupCache() { signatureCache.delete(key); } } + for (const [key, entry] of thinkingSignatureCache) { + if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) { + thinkingSignatureCache.delete(key); + } + } } /** @@ -63,3 +72,43 @@ export function cleanupCache() { export function getCacheSize() { return signatureCache.size; } + +/** + * Cache a thinking block signature with its model family + * @param {string} signature - The thinking signature to cache + * @param {string} modelFamily - The model family ('claude' or 'gemini') + */ +export function cacheThinkingSignature(signature, modelFamily) { + if (!signature || signature.length < MIN_SIGNATURE_LENGTH) return; + thinkingSignatureCache.set(signature, { + modelFamily, + timestamp: Date.now() + }); +} + +/** + * Get the cached model family for a thinking signature + * @param {string} signature - The signature to look up + * @returns {string|null} 'claude', 'gemini', or null if not found/expired + */ +export function getCachedSignatureFamily(signature) { + if (!signature) return null; + const entry = thinkingSignatureCache.get(signature); + if (!entry) return null; + + // Check TTL + if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) { + thinkingSignatureCache.delete(signature); + return null; + } + + return entry.modelFamily; +} + +/** + * Get the current thinking signature cache size (for debugging) + * @returns {number} Number of entries in the thinking signature cache + */ +export function getThinkingCacheSize() { + return thinkingSignatureCache.size; +} diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 7fca77b..9358311 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -386,16 +386,23 @@ export function analyzeConversationState(messages) { /** * Check if conversation needs thinking recovery. - * Returns true when: - * 1. We're in a tool loop but have no valid thinking blocks, OR - * 2. We have an interrupted tool with no valid thinking blocks + * + * For Gemini: recovery needed when (tool loop OR interrupted tool) AND no valid thinking + * For Claude: recovery needed when no valid compatible thinking (cross-model detection) * * @param {Array} messages - Array of messages + * @param {string} targetFamily - Target model family ('claude' or 'gemini') * @returns {boolean} True if thinking recovery is needed */ -export function needsThinkingRecovery(messages) { - const state = analyzeConversationState(messages); - // Need recovery if (tool loop OR interrupted tool) AND no thinking +export function needsThinkingRecovery(messages, targetFamily = null) { + const state = analyzeConversationState(messages, targetFamily); + + if (targetFamily === 'claude') { + // Claude: only check if thinking is valid/compatible + return !state.turnHasThinking; + } + + // Gemini (default): check tool loop/interrupted AND no thinking return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking; } @@ -414,9 +421,9 @@ function stripAllThinkingBlocks(messages) { const filtered = content.filter(block => !isThinkingPart(block)); if (msg.content) { - return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] }; + return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] }; } else if (msg.parts) { - return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] }; + return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '.' }] }; } return msg; }); diff --git a/tests/test-cross-model-thinking.cjs b/tests/test-cross-model-thinking.cjs new file mode 100644 index 0000000..b7f6704 --- /dev/null +++ b/tests/test-cross-model-thinking.cjs @@ -0,0 +1,511 @@ +/** + * Cross-Model Thinking Signature Test + * + * Tests that switching between Claude and Gemini models mid-conversation + * properly handles incompatible thinking signatures. + * + * Scenarios tested: + * 1. Claude → Gemini: Claude thinking signatures should be dropped + * 2. Gemini → Claude: Gemini thinking signatures should be dropped + * 3. Both should still work without errors (thinking recovery kicks in) + */ +const { streamRequest, nonStreamRequest, analyzeContent, commonTools } = require('./helpers/http-client.cjs'); +const { getModelConfig } = require('./helpers/test-models.cjs'); + +const tools = [commonTools.executeCommand]; + +// Test models +const CLAUDE_MODEL = 'claude-sonnet-4-5-thinking'; +const GEMINI_MODEL = 'gemini-3-flash'; + +async function testClaudeToGemini() { + console.log('='.repeat(60)); + console.log('TEST: Claude → Gemini Cross-Model Switch'); + console.log('Simulates starting with Claude, then switching to Gemini'); + console.log('='.repeat(60)); + console.log(''); + + const claudeConfig = getModelConfig('claude'); + const geminiConfig = getModelConfig('gemini'); + + // TURN 1: Get response from Claude with thinking + tool use + console.log('TURN 1: Request to Claude (get thinking signature)'); + console.log('-'.repeat(40)); + + const turn1Messages = [ + { role: 'user', content: 'Run the command "ls -la" to list files.' } + ]; + + const turn1Result = await streamRequest({ + model: CLAUDE_MODEL, + max_tokens: claudeConfig.max_tokens, + stream: true, + tools, + thinking: claudeConfig.thinking, + messages: turn1Messages + }); + + const turn1Content = analyzeContent(turn1Result.content); + console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`); + console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`); + + if (!turn1Content.hasToolUse) { + console.log(' SKIP: No tool use in turn 1'); + return { passed: false, skipped: true }; + } + + // Extract thinking and tool_use for the assistant message + const assistantContent = []; + if (turn1Content.hasThinking && turn1Content.thinking[0]) { + assistantContent.push({ + type: 'thinking', + thinking: turn1Content.thinking[0].thinking, + signature: turn1Content.thinking[0].signature || '' + }); + } + if (turn1Content.hasText && turn1Content.text[0]) { + assistantContent.push({ + type: 'text', + text: turn1Content.text[0].text + }); + } + for (const tool of turn1Content.toolUse) { + assistantContent.push({ + type: 'tool_use', + id: tool.id, + name: tool.name, + input: tool.input + }); + } + + const signatureLength = turn1Content.thinking[0]?.signature?.length || 0; + console.log(` Claude signature length: ${signatureLength}`); + + // TURN 2: Switch to Gemini with Claude's thinking signature in history + console.log('\nTURN 2: Request to Gemini (with Claude thinking in history)'); + console.log('-'.repeat(40)); + + const turn2Messages = [ + { role: 'user', content: 'Run the command "ls -la" to list files.' }, + { role: 'assistant', content: assistantContent }, + { + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: turn1Content.toolUse[0].id, + content: 'total 16\ndrwxr-xr-x 5 user staff 160 Jan 1 12:00 .\ndrwxr-xr-x 3 user staff 96 Jan 1 12:00 ..\n-rw-r--r-- 1 user staff 100 Jan 1 12:00 file.txt' + }] + } + ]; + + try { + const turn2Result = await streamRequest({ + model: GEMINI_MODEL, + max_tokens: geminiConfig.max_tokens, + stream: true, + tools, + thinking: geminiConfig.thinking, + messages: turn2Messages + }); + + const turn2Content = analyzeContent(turn2Result.content); + console.log(` Response received: YES`); + console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`); + console.log(` Error: NO`); + + // Success if we got any response without error + const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse; + console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`); + return { passed }; + } catch (error) { + console.log(` Error: ${error.message}`); + console.log(` Result: FAIL`); + return { passed: false, error: error.message }; + } +} + +async function testGeminiToClaude() { + console.log('\n' + '='.repeat(60)); + console.log('TEST: Gemini → Claude Cross-Model Switch'); + console.log('Simulates starting with Gemini, then switching to Claude'); + console.log('='.repeat(60)); + console.log(''); + + const claudeConfig = getModelConfig('claude'); + const geminiConfig = getModelConfig('gemini'); + + // TURN 1: Get response from Gemini with thinking + tool use + console.log('TURN 1: Request to Gemini (get thinking signature)'); + console.log('-'.repeat(40)); + + const turn1Messages = [ + { role: 'user', content: 'Run the command "pwd" to show current directory.' } + ]; + + const turn1Result = await streamRequest({ + model: GEMINI_MODEL, + max_tokens: geminiConfig.max_tokens, + stream: true, + tools, + thinking: geminiConfig.thinking, + messages: turn1Messages + }); + + const turn1Content = analyzeContent(turn1Result.content); + console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`); + console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`); + + if (!turn1Content.hasToolUse) { + console.log(' SKIP: No tool use in turn 1'); + return { passed: false, skipped: true }; + } + + // Extract content for the assistant message + const assistantContent = []; + if (turn1Content.hasThinking && turn1Content.thinking[0]) { + assistantContent.push({ + type: 'thinking', + thinking: turn1Content.thinking[0].thinking, + signature: turn1Content.thinking[0].signature || '' + }); + } + if (turn1Content.hasText && turn1Content.text[0]) { + assistantContent.push({ + type: 'text', + text: turn1Content.text[0].text + }); + } + for (const tool of turn1Content.toolUse) { + const toolBlock = { + type: 'tool_use', + id: tool.id, + name: tool.name, + input: tool.input + }; + // Include thoughtSignature if present (Gemini puts it on tool_use) + if (tool.thoughtSignature) { + toolBlock.thoughtSignature = tool.thoughtSignature; + } + assistantContent.push(toolBlock); + } + + const thinkingSigLength = turn1Content.thinking[0]?.signature?.length || 0; + const toolUseSigLength = turn1Content.toolUse[0]?.thoughtSignature?.length || 0; + console.log(` Gemini thinking signature length: ${thinkingSigLength}`); + console.log(` Gemini tool_use signature length: ${toolUseSigLength}`); + + // TURN 2: Switch to Claude with Gemini's thinking signature in history + console.log('\nTURN 2: Request to Claude (with Gemini thinking in history)'); + console.log('-'.repeat(40)); + console.log(` Assistant content being sent: ${JSON.stringify(assistantContent).substring(0, 400)}`); + + const turn2Messages = [ + { role: 'user', content: 'Run the command "pwd" to show current directory.' }, + { role: 'assistant', content: assistantContent }, + { + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: turn1Content.toolUse[0].id, + content: '/home/user/projects' + }] + } + ]; + + try { + const turn2Result = await streamRequest({ + model: CLAUDE_MODEL, + max_tokens: claudeConfig.max_tokens, + stream: true, + tools, + thinking: claudeConfig.thinking, + messages: turn2Messages + }); + + const turn2Content = analyzeContent(turn2Result.content); + console.log(` Response received: YES`); + console.log(` Stop reason: ${turn2Result.stop_reason}`); + console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`); + console.log(` Tool Use: ${turn2Content.hasToolUse ? 'YES' : 'NO'}`); + console.log(` Raw content: ${JSON.stringify(turn2Result.content).substring(0, 300)}`); + console.log(` Error: NO`); + + // Success if we got any response without error + const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse; + console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`); + return { passed }; + } catch (error) { + console.log(` Error: ${error.message}`); + console.log(` Result: FAIL`); + return { passed: false, error: error.message }; + } +} + +async function testSameModelContinuation() { + console.log('\n' + '='.repeat(60)); + console.log('TEST: Same Model Continuation - Claude (Control Test)'); + console.log('Verifies same-model multi-turn still works'); + console.log('='.repeat(60)); + console.log(''); + + const claudeConfig = getModelConfig('claude'); + + // TURN 1: Get response from Claude + console.log('TURN 1: Request to Claude'); + console.log('-'.repeat(40)); + + const turn1Messages = [ + { role: 'user', content: 'Run "echo hello" command.' } + ]; + + const turn1Result = await streamRequest({ + model: CLAUDE_MODEL, + max_tokens: claudeConfig.max_tokens, + stream: true, + tools, + thinking: claudeConfig.thinking, + messages: turn1Messages + }); + + const turn1Content = analyzeContent(turn1Result.content); + console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`); + console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`); + + if (!turn1Content.hasToolUse) { + console.log(' SKIP: No tool use in turn 1'); + return { passed: false, skipped: true }; + } + + // Build assistant message + const assistantContent = []; + if (turn1Content.hasThinking && turn1Content.thinking[0]) { + assistantContent.push({ + type: 'thinking', + thinking: turn1Content.thinking[0].thinking, + signature: turn1Content.thinking[0].signature || '' + }); + } + if (turn1Content.hasText && turn1Content.text[0]) { + assistantContent.push({ + type: 'text', + text: turn1Content.text[0].text + }); + } + for (const tool of turn1Content.toolUse) { + assistantContent.push({ + type: 'tool_use', + id: tool.id, + name: tool.name, + input: tool.input + }); + } + + // TURN 2: Continue with same model + console.log('\nTURN 2: Continue with Claude (same model)'); + console.log('-'.repeat(40)); + + const turn2Messages = [ + { role: 'user', content: 'Run "echo hello" command.' }, + { role: 'assistant', content: assistantContent }, + { + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: turn1Content.toolUse[0].id, + content: 'hello' + }] + } + ]; + + try { + const turn2Result = await streamRequest({ + model: CLAUDE_MODEL, + max_tokens: claudeConfig.max_tokens, + stream: true, + tools, + thinking: claudeConfig.thinking, + messages: turn2Messages + }); + + const turn2Content = analyzeContent(turn2Result.content); + console.log(` Response received: YES`); + console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`); + console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`); + console.log(` Error: NO`); + + // For same model, we should preserve thinking with valid signature + const passed = turn2Content.hasText || turn2Content.hasThinking; + console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`); + return { passed }; + } catch (error) { + console.log(` Error: ${error.message}`); + console.log(` Result: FAIL`); + return { passed: false, error: error.message }; + } +} + +async function testSameModelContinuationGemini() { + console.log('\n' + '='.repeat(60)); + console.log('TEST: Same Model Continuation - Gemini (Control Test)'); + console.log('Verifies same-model multi-turn still works for Gemini'); + console.log('='.repeat(60)); + console.log(''); + + const geminiConfig = getModelConfig('gemini'); + + // TURN 1: Get response from Gemini + console.log('TURN 1: Request to Gemini'); + console.log('-'.repeat(40)); + + const turn1Messages = [ + { role: 'user', content: 'Run "echo world" command.' } + ]; + + const turn1Result = await streamRequest({ + model: GEMINI_MODEL, + max_tokens: geminiConfig.max_tokens, + stream: true, + tools, + thinking: geminiConfig.thinking, + messages: turn1Messages + }); + + const turn1Content = analyzeContent(turn1Result.content); + console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`); + console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`); + + if (!turn1Content.hasToolUse) { + console.log(' SKIP: No tool use in turn 1'); + return { passed: false, skipped: true }; + } + + // Build assistant message + const assistantContent = []; + if (turn1Content.hasThinking && turn1Content.thinking[0]) { + assistantContent.push({ + type: 'thinking', + thinking: turn1Content.thinking[0].thinking, + signature: turn1Content.thinking[0].signature || '' + }); + } + if (turn1Content.hasText && turn1Content.text[0]) { + assistantContent.push({ + type: 'text', + text: turn1Content.text[0].text + }); + } + for (const tool of turn1Content.toolUse) { + const toolBlock = { + type: 'tool_use', + id: tool.id, + name: tool.name, + input: tool.input + }; + // Include thoughtSignature if present (Gemini puts it on tool_use) + if (tool.thoughtSignature) { + toolBlock.thoughtSignature = tool.thoughtSignature; + } + assistantContent.push(toolBlock); + } + + // TURN 2: Continue with same model + console.log('\nTURN 2: Continue with Gemini (same model)'); + console.log('-'.repeat(40)); + + const turn2Messages = [ + { role: 'user', content: 'Run "echo world" command.' }, + { role: 'assistant', content: assistantContent }, + { + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: turn1Content.toolUse[0].id, + content: 'world' + }] + } + ]; + + try { + const turn2Result = await streamRequest({ + model: GEMINI_MODEL, + max_tokens: geminiConfig.max_tokens, + stream: true, + tools, + thinking: geminiConfig.thinking, + messages: turn2Messages + }); + + const turn2Content = analyzeContent(turn2Result.content); + console.log(` Response received: YES`); + console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`); + console.log(` Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`); + console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`); + console.log(` Error: NO`); + + // For same model, we should get a response + const passed = turn2Content.hasText || turn2Content.hasThinking; + console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`); + return { passed }; + } catch (error) { + console.log(` Error: ${error.message}`); + console.log(` Result: FAIL`); + return { passed: false, error: error.message }; + } +} + +async function main() { + console.log('\n'); + console.log('╔' + '═'.repeat(58) + '╗'); + console.log('║' + ' CROSS-MODEL THINKING SIGNATURE TEST SUITE '.padEnd(58) + '║'); + console.log('║' + ' Tests switching between Claude and Gemini '.padEnd(58) + '║'); + console.log('╚' + '═'.repeat(58) + '╝'); + console.log('\n'); + + const results = []; + + // Test 1: Claude → Gemini + const claudeToGemini = await testClaudeToGemini(); + results.push({ name: 'Claude → Gemini', ...claudeToGemini }); + + // Test 2: Gemini → Claude + const geminiToClaude = await testGeminiToClaude(); + results.push({ name: 'Gemini → Claude', ...geminiToClaude }); + + // Test 3: Same model Claude (control) + const sameModelClaude = await testSameModelContinuation(); + results.push({ name: 'Same Model (Claude → Claude)', ...sameModelClaude }); + + // Test 4: Same model Gemini (control) + const sameModelGemini = await testSameModelContinuationGemini(); + results.push({ name: 'Same Model (Gemini → Gemini)', ...sameModelGemini }); + + // Summary + console.log('\n' + '='.repeat(60)); + console.log('SUMMARY'); + console.log('='.repeat(60)); + + let allPassed = true; + for (const result of results) { + const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL'); + console.log(` [${status}] ${result.name}`); + if (!result.passed && !result.skipped) allPassed = false; + } + + console.log('\n' + '='.repeat(60)); + console.log(`FINAL RESULT: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`); + console.log('='.repeat(60)); + + process.exit(allPassed ? 0 : 1); +} + +main().catch(err => { + console.error('Test error:', err); + process.exit(1); +}); From 602d6ca0f8f1dbdac1c769b47facfe6a83e7bd6f Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 22:05:16 +0530 Subject: [PATCH 03/10] move fallback map to constants --- src/constants.js | 12 +++++++++++- src/fallback-config.js | 17 +++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/constants.js b/src/constants.js index d957923..7c4e75a 100644 --- a/src/constants.js +++ b/src/constants.js @@ -144,6 +144,15 @@ export const OAUTH_CONFIG = { }; export const OAUTH_REDIRECT_URI = `http://localhost:${OAUTH_CONFIG.callbackPort}/oauth-callback`; +// Model fallback mapping - maps primary model to fallback when quota exhausted +export const MODEL_FALLBACK_MAP = { + 'gemini-3-pro-high': 'claude-sonnet-4-5-thinking', + 'gemini-3-pro-low': 'claude-sonnet-4-5', + 'claude-opus-4-5-thinking': 'gemini-3-pro-high', + 'claude-sonnet-4-5-thinking': 'gemini-3-pro-high', + 'claude-sonnet-4-5': 'gemini-3-pro-low' +}; + export default { ANTIGRAVITY_ENDPOINT_FALLBACKS, ANTIGRAVITY_HEADERS, @@ -165,5 +174,6 @@ export default { getModelFamily, isThinkingModel, OAUTH_CONFIG, - OAUTH_REDIRECT_URI + OAUTH_REDIRECT_URI, + MODEL_FALLBACK_MAP }; diff --git a/src/fallback-config.js b/src/fallback-config.js index 880e5ac..894cdee 100644 --- a/src/fallback-config.js +++ b/src/fallback-config.js @@ -1,21 +1,14 @@ /** * Model Fallback Configuration - * + * * Defines fallback mappings for when a model's quota is exhausted across all accounts. * Enables graceful degradation to alternative models with similar capabilities. */ -/** - * Model fallback mapping - * Maps primary model ID to fallback model ID - */ -export const MODEL_FALLBACK_MAP = { - 'gemini-3-pro-high': 'claude-sonnet-4-5-thinking', - 'gemini-3-pro-low': 'claude-sonnet-4-5', - 'claude-opus-4-5-thinking': 'gemini-3-pro-high', - 'claude-sonnet-4-5-thinking': 'gemini-3-pro-high', - 'claude-sonnet-4-5': 'gemini-3-pro-low' -}; +import { MODEL_FALLBACK_MAP } from './constants.js'; + +// Re-export for convenience +export { MODEL_FALLBACK_MAP }; /** * Get fallback model for a given model ID From b7286059eee5455553b37bcf654ad3b97c36fcc1 Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 22:13:35 +0530 Subject: [PATCH 04/10] remove targetFamily from analyzeConversationState --- src/format/thinking-utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 9358311..83fa902 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -395,7 +395,7 @@ export function analyzeConversationState(messages) { * @returns {boolean} True if thinking recovery is needed */ export function needsThinkingRecovery(messages, targetFamily = null) { - const state = analyzeConversationState(messages, targetFamily); + const state = analyzeConversationState(messages); if (targetFamily === 'claude') { // Claude: only check if thinking is valid/compatible From 668c7aef26f7b44034a0711dcd552c2bd9bb660d Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 22:38:46 +0530 Subject: [PATCH 05/10] correct els if condition for state.inToolLoop --- src/format/thinking-utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 83fa902..af9b3de 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -464,7 +464,7 @@ export function closeToolLoopForThinking(messages) { }); logger.debug('[ThinkingUtils] Applied thinking recovery for interrupted tool'); - } else { + } else if (state.inToolLoop) { // For tool loops: add synthetic messages to close the loop const syntheticText = state.toolResultCount === 1 ? '[Tool execution completed.]' From dc65499c49b561cf8affa01be0b03eee10fdac21 Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 23:17:38 +0530 Subject: [PATCH 06/10] Preserve valid thinking blocks during recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of stripping all thinking blocks during thinking recovery, now only strips invalid or incompatible blocks. Uses signature cache to validate family compatibility for cross-model fallback scenarios. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/format/request-converter.js | 2 +- src/format/thinking-utils.js | 38 +++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/format/request-converter.js b/src/format/request-converter.js index 7343439..fa2d920 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -85,7 +85,7 @@ export function convertAnthropicToGoogle(anthropicRequest) { if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) { logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`); - processedMessages = closeToolLoopForThinking(messages); + processedMessages = closeToolLoopForThinking(messages, targetFamily); } // Convert messages to contents, then filter unsigned thinking blocks diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index af9b3de..9017a8c 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -4,6 +4,7 @@ */ import { MIN_SIGNATURE_LENGTH } from '../constants.js'; +import { getCachedSignatureFamily } from './signature-cache.js'; import { logger } from '../utils/logger.js'; /** @@ -407,18 +408,40 @@ export function needsThinkingRecovery(messages, targetFamily = null) { } /** - * Strip all thinking blocks from messages. + * Strip invalid or incompatible thinking blocks from messages. * Used before injecting synthetic messages for recovery. + * Keeps valid thinking blocks to preserve context from previous turns. * * @param {Array} messages - Array of messages - * @returns {Array} Messages with all thinking blocks removed + * @param {string} targetFamily - Target model family ('claude' or 'gemini') + * @returns {Array} Messages with invalid thinking blocks removed */ -function stripAllThinkingBlocks(messages) { +function stripInvalidThinkingBlocks(messages, targetFamily = null) { return messages.map(msg => { const content = msg.content || msg.parts; if (!Array.isArray(content)) return msg; - const filtered = content.filter(block => !isThinkingPart(block)); + const filtered = content.filter(block => { + // Keep non-thinking blocks + if (!isThinkingPart(block)) return true; + + // Check generic validity (has signature of sufficient length) + if (!hasValidSignature(block)) return false; + + // Check family compatibility if targetFamily is provided + if (targetFamily) { + const signature = block.thought === true ? block.thoughtSignature : block.signature; + const signatureFamily = getCachedSignatureFamily(signature); + + // Strict validation: If we don't know the family (cache miss) or it doesn't match, + // we drop it. We don't assume validity for unknown signatures. + if (signatureFamily !== targetFamily) { + return false; + } + } + + return true; + }); if (msg.content) { return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] }; @@ -439,16 +462,17 @@ function stripAllThinkingBlocks(messages) { * loop and allow the model to continue. * * @param {Array} messages - Array of messages + * @param {string} targetFamily - Target model family ('claude' or 'gemini') * @returns {Array} Modified messages with synthetic messages injected */ -export function closeToolLoopForThinking(messages) { +export function closeToolLoopForThinking(messages, targetFamily = null) { const state = analyzeConversationState(messages); // Handle neither tool loop nor interrupted tool if (!state.inToolLoop && !state.interruptedTool) return messages; - // Strip all thinking blocks - let modified = stripAllThinkingBlocks(messages); + // Strip only invalid/incompatible thinking blocks (keep valid ones) + let modified = stripInvalidThinkingBlocks(messages, targetFamily); if (state.interruptedTool) { // For interrupted tools: just strip thinking and add a synthetic assistant message From 12e427e9d5b7b6ffc8e992caced7e9f9833ebe2f Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 23:24:52 +0530 Subject: [PATCH 07/10] Fix needsThinkingRecovery to require tool loop context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/format/request-converter.js | 2 +- src/format/thinking-utils.js | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/format/request-converter.js b/src/format/request-converter.js index fa2d920..98a378c 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -83,7 +83,7 @@ export function convertAnthropicToGoogle(anthropicRequest) { let processedMessages = messages; const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null; - if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) { + if (isThinking && targetFamily && needsThinkingRecovery(messages)) { logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`); processedMessages = closeToolLoopForThinking(messages, targetFamily); } diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 9017a8c..3dcf883 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -388,23 +388,24 @@ export function analyzeConversationState(messages) { /** * Check if conversation needs thinking recovery. * - * For Gemini: recovery needed when (tool loop OR interrupted tool) AND no valid thinking - * For Claude: recovery needed when no valid compatible thinking (cross-model detection) + * Recovery is only needed when: + * 1. We're in a tool loop or have an interrupted tool, AND + * 2. No valid thinking blocks exist in the current turn + * + * Cross-model signature compatibility is handled by stripInvalidThinkingBlocks + * during recovery (not here). * * @param {Array} messages - Array of messages - * @param {string} targetFamily - Target model family ('claude' or 'gemini') * @returns {boolean} True if thinking recovery is needed */ -export function needsThinkingRecovery(messages, targetFamily = null) { +export function needsThinkingRecovery(messages) { const state = analyzeConversationState(messages); - if (targetFamily === 'claude') { - // Claude: only check if thinking is valid/compatible - return !state.turnHasThinking; - } + // Recovery is only needed in tool loops or interrupted tools + if (!state.inToolLoop && !state.interruptedTool) return false; - // Gemini (default): check tool loop/interrupted AND no thinking - return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking; + // Need recovery if no valid thinking blocks exist + return !state.turnHasThinking; } /** From 53f8d7f6cc626b43efd2de41af00941137eac5a2 Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sat, 3 Jan 2026 23:29:21 +0530 Subject: [PATCH 08/10] Add debug logging when stripping thinking blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/format/thinking-utils.js | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 3dcf883..06bca85 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -418,7 +418,9 @@ export function needsThinkingRecovery(messages) { * @returns {Array} Messages with invalid thinking blocks removed */ function stripInvalidThinkingBlocks(messages, targetFamily = null) { - return messages.map(msg => { + let strippedCount = 0; + + const result = messages.map(msg => { const content = msg.content || msg.parts; if (!Array.isArray(content)) return msg; @@ -427,7 +429,10 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) { if (!isThinkingPart(block)) return true; // Check generic validity (has signature of sufficient length) - if (!hasValidSignature(block)) return false; + if (!hasValidSignature(block)) { + strippedCount++; + return false; + } // Check family compatibility if targetFamily is provided if (targetFamily) { @@ -437,6 +442,7 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) { // Strict validation: If we don't know the family (cache miss) or it doesn't match, // we drop it. We don't assume validity for unknown signatures. if (signatureFamily !== targetFamily) { + strippedCount++; return false; } } @@ -451,6 +457,12 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) { } return msg; }); + + if (strippedCount > 0) { + logger.debug(`[ThinkingUtils] Stripped ${strippedCount} invalid/incompatible thinking block(s)`); + } + + return result; } /** From 141558dd624805642fa247553e9f8d340beece9e Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sun, 4 Jan 2026 00:11:14 +0530 Subject: [PATCH 09/10] Improve cross-model thinking handling and add gemini-3-flash fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add gemini-3-flash to MODEL_FALLBACK_MAP for completeness - Add hasGeminiHistory() to detect Gemini→Claude cross-model switch - Trigger recovery for Claude only when Gemini history detected - Remove unnecessary thinking block filtering for Claude-only conversations - Add comments explaining '.' placeholder usage - Remove unused filterUnsignedThinkingFromMessages function 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/constants.js | 7 ++++--- src/format/request-converter.js | 22 +++++++++++++++------- src/format/thinking-utils.js | 26 +++++++++++++++++++++----- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/constants.js b/src/constants.js index 7c4e75a..06f9af7 100644 --- a/src/constants.js +++ b/src/constants.js @@ -146,11 +146,12 @@ export const OAUTH_REDIRECT_URI = `http://localhost:${OAUTH_CONFIG.callbackPort} // Model fallback mapping - maps primary model to fallback when quota exhausted export const MODEL_FALLBACK_MAP = { - 'gemini-3-pro-high': 'claude-sonnet-4-5-thinking', + 'gemini-3-pro-high': 'claude-opus-4-5-thinking', 'gemini-3-pro-low': 'claude-sonnet-4-5', + 'gemini-3-flash': 'claude-sonnet-4-5-thinking', 'claude-opus-4-5-thinking': 'gemini-3-pro-high', - 'claude-sonnet-4-5-thinking': 'gemini-3-pro-high', - 'claude-sonnet-4-5': 'gemini-3-pro-low' + 'claude-sonnet-4-5-thinking': 'gemini-3-flash', + 'claude-sonnet-4-5': 'gemini-3-flash' }; export default { diff --git a/src/format/request-converter.js b/src/format/request-converter.js index 98a378c..3e97406 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -15,6 +15,7 @@ import { removeTrailingThinkingBlocks, reorderAssistantContent, filterUnsignedThinkingBlocks, + hasGeminiHistory, needsThinkingRecovery, closeToolLoopForThinking } from './thinking-utils.js'; @@ -77,15 +78,20 @@ export function convertAnthropicToGoogle(anthropicRequest) { } } - // Apply thinking recovery for thinking models when needed - // - Gemini: needs recovery for tool loops/interrupted tools (stripped thinking) - // - Claude: needs recovery ONLY when cross-model (incompatible Gemini signatures will be dropped) + // Apply thinking recovery for Gemini thinking models when needed + // Gemini needs recovery for tool loops/interrupted tools (stripped thinking) let processedMessages = messages; - const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null; - if (isThinking && targetFamily && needsThinkingRecovery(messages)) { - logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`); - processedMessages = closeToolLoopForThinking(messages, targetFamily); + if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) { + logger.debug('[RequestConverter] Applying thinking recovery for Gemini'); + processedMessages = closeToolLoopForThinking(messages, 'gemini'); + } + + // For Claude: apply recovery only for cross-model (Gemini→Claude) switch + // Detected by checking if history has Gemini-style tool_use with thoughtSignature + if (isClaudeModel && isThinking && hasGeminiHistory(messages) && needsThinkingRecovery(messages)) { + logger.debug('[RequestConverter] Applying thinking recovery for Claude (cross-model from Gemini)'); + processedMessages = closeToolLoopForThinking(messages, 'claude'); } // Convert messages to contents, then filter unsigned thinking blocks @@ -108,6 +114,8 @@ export function convertAnthropicToGoogle(anthropicRequest) { // SAFETY: Google API requires at least one part per content message // This happens when all thinking blocks are filtered out (unsigned) if (parts.length === 0) { + // Use '.' instead of '' because claude models reject empty text parts. + // A single period is invisible in practice but satisfies the API requirement. logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder'); parts.push({ text: '.' }); } diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 06bca85..14ce530 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -27,6 +27,21 @@ export function hasValidSignature(part) { return typeof signature === 'string' && signature.length >= MIN_SIGNATURE_LENGTH; } +/** + * Check if conversation history contains Gemini-style messages. + * Gemini puts thoughtSignature on tool_use blocks, Claude puts signature on thinking blocks. + * @param {Array} messages - Array of messages + * @returns {boolean} True if any tool_use has thoughtSignature (Gemini pattern) + */ +export function hasGeminiHistory(messages) { + return messages.some(msg => + Array.isArray(msg.content) && + msg.content.some(block => + block.type === 'tool_use' && block.thoughtSignature !== undefined + ) + ); +} + /** * Sanitize a thinking part by keeping only allowed fields */ @@ -434,14 +449,14 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) { return false; } - // Check family compatibility if targetFamily is provided - if (targetFamily) { + // Check family compatibility only for Gemini targets + // Claude can validate its own signatures, so we don't drop for Claude + if (targetFamily === 'gemini') { const signature = block.thought === true ? block.thoughtSignature : block.signature; const signatureFamily = getCachedSignatureFamily(signature); - // Strict validation: If we don't know the family (cache miss) or it doesn't match, - // we drop it. We don't assume validity for unknown signatures. - if (signatureFamily !== targetFamily) { + // For Gemini: drop unknown or mismatched signatures + if (!signatureFamily || signatureFamily !== targetFamily) { strippedCount++; return false; } @@ -450,6 +465,7 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) { return true; }); + // Use '.' instead of '' because claude models reject empty text parts if (msg.content) { return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] }; } else if (msg.parts) { From e0b3f9077703fc9cf36f9d11b989cfadb10f9d3f Mon Sep 17 00:00:00 2001 From: Badri Narayanan S Date: Sun, 4 Jan 2026 00:19:35 +0530 Subject: [PATCH 10/10] docs: update CLAUDE.md with model fallback and cross-model features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add --fallback and --debug startup flags to commands section - Add test:crossmodel script for cross-model thinking tests - Document fallback-config.js module in directory structure - Add Model Fallback section explaining fallback behavior - Add Cross-Model Thinking Signatures section explaining compatibility - Update constants section with MODEL_FALLBACK_MAP 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CLAUDE.md | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 443c3ff..70c7fb2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -17,6 +17,12 @@ npm install # Start server (runs on port 8080) npm start +# Start with model fallback enabled (falls back to alternate model when quota exhausted) +npm start -- --fallback + +# Start with debug logging +npm start -- --debug + # Start with file watching for development npm run dev @@ -36,6 +42,7 @@ npm run test:streaming # Streaming SSE events npm run test:interleaved # Interleaved thinking npm run test:images # Image processing npm run test:caching # Prompt caching +npm run test:crossmodel # Cross-model thinking signatures ``` ## Architecture @@ -53,6 +60,7 @@ src/ ├── server.js # Express server ├── constants.js # Configuration values ├── errors.js # Custom error classes +├── fallback-config.js # Model fallback mappings and helpers │ ├── cloudcode/ # Cloud Code API client │ ├── index.js # Public API exports @@ -87,7 +95,7 @@ src/ │ ├── content-converter.js # Message content conversion │ ├── schema-sanitizer.js # JSON Schema cleaning for Gemini │ ├── thinking-utils.js # Thinking block validation/recovery -│ └── signature-cache.js # In-memory signature cache +│ └── signature-cache.js # Signature cache (tool_use + thinking signatures) │ └── utils/ # Utilities ├── helpers.js # formatDuration, sleep @@ -101,7 +109,8 @@ src/ - **src/account-manager/**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown - **src/auth/**: Authentication including Google OAuth, token extraction, and database access - **src/format/**: Format conversion between Anthropic and Google Generative AI formats -- **src/constants.js**: API endpoints, model mappings, OAuth config, and all configuration values +- **src/constants.js**: API endpoints, model mappings, fallback config, OAuth config, and all configuration values +- **src/fallback-config.js**: Model fallback mappings (`getFallbackModel()`, `hasFallback()`) - **src/errors.js**: Custom error classes (`RateLimitError`, `AuthError`, `ApiError`, etc.) **Multi-Account Load Balancing:** @@ -117,6 +126,22 @@ src/ - `cache_read_input_tokens` returned in usage metadata when cache hits - Token calculation: `input_tokens = promptTokenCount - cachedContentTokenCount` +**Model Fallback (--fallback flag):** +- When all accounts are exhausted for a model, automatically falls back to an alternate model +- Fallback mappings defined in `MODEL_FALLBACK_MAP` in `src/constants.js` +- Thinking models fall back to thinking models (e.g., `claude-sonnet-4-5-thinking` → `gemini-3-flash`) +- Fallback is disabled on recursive calls to prevent infinite chains +- Enable with `npm start -- --fallback` or `FALLBACK=true` environment variable + +**Cross-Model Thinking Signatures:** +- Claude and Gemini use incompatible thinking signatures +- When switching models mid-conversation, incompatible signatures are detected and dropped +- Signature cache tracks model family ('claude' or 'gemini') for each signature +- `hasGeminiHistory()` detects Gemini→Claude cross-model scenarios +- Thinking recovery (`closeToolLoopForThinking()`) injects synthetic messages to close interrupted tool loops +- For Gemini targets: strict validation - drops unknown or mismatched signatures +- For Claude targets: lenient - lets Claude validate its own signatures + ## Testing Notes - Tests require the server to be running (`npm start` in separate terminal) @@ -129,6 +154,7 @@ src/ **Constants:** All configuration values are centralized in `src/constants.js`: - API endpoints and headers - Model mappings and model family detection (`getModelFamily()`, `isThinkingModel()`) +- Model fallback mappings (`MODEL_FALLBACK_MAP`) - OAuth configuration - Rate limit thresholds - Thinking model settings