From df6625b531d0ff0816e7930af689f831dfe70c7c Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 18:01:21 +0530
Subject: [PATCH 01/10] fallback changes from PR #35

---
 src/cloudcode/message-handler.js   | 13 ++++++++++-
 src/cloudcode/streaming-handler.js | 14 +++++++++++-
 src/fallback-config.js             | 36 ++++++++++++++++++++++++++++++
 src/index.js                       | 22 +++++++++++++++---
 src/server.js                      |  8 +++++--
 5 files changed, 86 insertions(+), 7 deletions(-)
 create mode 100644 src/fallback-config.js
diff --git a/src/cloudcode/message-handler.js b/src/cloudcode/message-handler.js
index 4491afc..beb6745 100644
--- a/src/cloudcode/message-handler.js
+++ b/src/cloudcode/message-handler.js
@@ -18,6 +18,7 @@ import { logger } from '../utils/logger.js';
 import { parseResetTime } from './rate-limit-parser.js';
 import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
 import { parseThinkingSSEResponse } from './sse-parser.js';
+import { getFallbackModel } from '../fallback-config.js';
 
 /**
  * Send a non-streaming request to Cloud Code with multi-account support
@@ -32,7 +33,7 @@ import { parseThinkingSSEResponse } from './sse-parser.js';
  * @returns {Promise<Object>} Anthropic-format response object
  * @throws {Error} If max retries exceeded or no accounts available
  */
-export async function sendMessage(anthropicRequest, accountManager) {
+export async function sendMessage(anthropicRequest, accountManager, fallbackEnabled = false) {
     const model = anthropicRequest.model;
     const isThinking = isThinkingModel(model);
 
@@ -76,6 +77,16 @@ export async function sendMessage(anthropicRequest, accountManager) {
             }
 
             if (!account) {
+                // Check if fallback is enabled and available
+                if (fallbackEnabled) {
+                    const fallbackModel = getFallbackModel(model);
+                    if (fallbackModel) {
+                        logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
+                        // Retry with fallback model
+                        const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+                        return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
+                    }
+                }
                 throw new Error('No accounts available');
             }
         }
diff --git a/src/cloudcode/streaming-handler.js b/src/cloudcode/streaming-handler.js
index f3af687..f33945b 100644
--- a/src/cloudcode/streaming-handler.js
+++ b/src/cloudcode/streaming-handler.js
@@ -16,6 +16,7 @@ import { logger } from '../utils/logger.js';
 import { parseResetTime } from './rate-limit-parser.js';
 import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
 import { streamSSEResponse } from './sse-streamer.js';
+import { getFallbackModel } from '../fallback-config.js';
 
 
 /**
@@ -31,7 +32,7 @@ import { streamSSEResponse } from './sse-streamer.js';
  * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
  * @throws {Error} If max retries exceeded or no accounts available
  */
-export async function* sendMessageStream(anthropicRequest, accountManager) {
+export async function* sendMessageStream(anthropicRequest, accountManager, fallbackEnabled = false) {
     const model = anthropicRequest.model;
 
     // Retry loop with account failover
@@ -74,6 +75,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager) {
             }
 
             if (!account) {
+                // Check if fallback is enabled and available
+                if (fallbackEnabled) {
+                    const fallbackModel = getFallbackModel(model);
+                    if (fallbackModel) {
+                        logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
+                        // Retry with fallback model
+                        const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+                        yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
+                        return;
+                    }
+                }
                 throw new Error('No accounts available');
             }
         }
diff --git a/src/fallback-config.js b/src/fallback-config.js
new file mode 100644
index 0000000..880e5ac
--- /dev/null
+++ b/src/fallback-config.js
@@ -0,0 +1,36 @@
+/**
+ * Model Fallback Configuration
+ * 
+ * Defines fallback mappings for when a model's quota is exhausted across all accounts.
+ * Enables graceful degradation to alternative models with similar capabilities.
+ */
+
+/**
+ * Model fallback mapping
+ * Maps primary model ID to fallback model ID
+ */
+export const MODEL_FALLBACK_MAP = {
+    'gemini-3-pro-high': 'claude-sonnet-4-5-thinking',
+    'gemini-3-pro-low': 'claude-sonnet-4-5',
+    'claude-opus-4-5-thinking': 'gemini-3-pro-high',
+    'claude-sonnet-4-5-thinking': 'gemini-3-pro-high',
+    'claude-sonnet-4-5': 'gemini-3-pro-low'
+};
+
+/**
+ * Get fallback model for a given model ID
+ * @param {string} model - Primary model ID
+ * @returns {string|null} Fallback model ID or null if no fallback exists
+ */
+export function getFallbackModel(model) {
+    return MODEL_FALLBACK_MAP[model] || null;
+}
+
+/**
+ * Check if a model has a fallback configured
+ * @param {string} model - Model ID to check
+ * @returns {boolean} True if fallback exists
+ */
+export function hasFallback(model) {
+    return model in MODEL_FALLBACK_MAP;
+}
diff --git a/src/index.js b/src/index.js
index 3c71759..b439884 100644
--- a/src/index.js
+++ b/src/index.js
@@ -12,6 +12,7 @@ import os from 'os';
 // Parse command line arguments
 const args = process.argv.slice(2);
 const isDebug = args.includes('--debug') || process.env.DEBUG === 'true';
+const isFallbackEnabled = args.includes('--fallback') || process.env.FALLBACK === 'true';
 
 // Initialize logger
 logger.setDebug(isDebug);
@@ -20,6 +21,13 @@ if (isDebug) {
     logger.debug('Debug mode enabled');
 }
 
+if (isFallbackEnabled) {
+    logger.info('Model fallback mode enabled');
+}
+
+// Export fallback flag for server to use
+export const FALLBACK_ENABLED = isFallbackEnabled;
+
 const PORT = process.env.PORT || DEFAULT_PORT;
 
 // Home directory for account storage
@@ -40,14 +48,22 @@ app.listen(PORT, () => {
     if (!isDebug) {
         controlSection += '║    --debug            Enable debug logging                   ║\n';
     }
+    if (!isFallbackEnabled) {
+        controlSection += '║    --fallback         Enable model fallback on quota exhaust ║\n';
+    }
     controlSection += '║    Ctrl+C             Stop server                            ║';
 
-    // Build status section if debug mode is active
+    // Build status section if any modes are active
     let statusSection = '';
-    if (isDebug) {
+    if (isDebug || isFallbackEnabled) {
         statusSection = '║                                                              ║\n';
         statusSection += '║  Active Modes:                                               ║\n';
-        statusSection += '║    ✓ Debug mode enabled                                      ║\n';
+        if (isDebug) {
+            statusSection += '║    ✓ Debug mode enabled                                      ║\n';
+        }
+        if (isFallbackEnabled) {
+            statusSection += '║    ✓ Model fallback enabled                                  ║\n';
+        }
     }
 
     logger.log(`
diff --git a/src/server.js b/src/server.js
index f0ccb5d..587b393 100644
--- a/src/server.js
+++ b/src/server.js
@@ -13,6 +13,10 @@ import { AccountManager } from './account-manager/index.js';
 import { formatDuration } from './utils/helpers.js';
 import { logger } from './utils/logger.js';
 
+// Parse fallback flag directly from command line args to avoid circular dependency
+const args = process.argv.slice(2);
+const FALLBACK_ENABLED = args.includes('--fallback') || process.env.FALLBACK === 'true';
+
 const app = express();
 
 // Initialize account manager (will be fully initialized on first request or startup)
@@ -595,7 +599,7 @@ app.post('/v1/messages', async (req, res) => {
 
             try {
                 // Use the streaming generator with account manager
-                for await (const event of sendMessageStream(request, accountManager)) {
+                for await (const event of sendMessageStream(request, accountManager, FALLBACK_ENABLED)) {
                     res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
                     // Flush after each event for real-time streaming
                     if (res.flush) res.flush();
@@ -616,7 +620,7 @@ app.post('/v1/messages', async (req, res) => {
 
         } else {
             // Handle non-streaming response
-            const response = await sendMessage(request, accountManager);
+            const response = await sendMessage(request, accountManager, FALLBACK_ENABLED);
             res.json(response);
         }
 

From ac9ec6b3584a2aa4aaf91695eeb867fdab076da3 Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 22:01:57 +0530
Subject: [PATCH 02/10] Signature handling for fallback

---
 package.json                        |   3 +-
 src/cloudcode/sse-streamer.js       |   7 +-
 src/format/content-converter.js     |  23 +-
 src/format/request-converter.js     |  14 +-
 src/format/response-converter.js    |  10 +-
 src/format/signature-cache.js       |  51 ++-
 src/format/thinking-utils.js        |  23 +-
 tests/test-cross-model-thinking.cjs | 511 ++++++++++++++++++++++++++++
 8 files changed, 618 insertions(+), 24 deletions(-)
 create mode 100644 tests/test-cross-model-thinking.cjs

diff --git a/package.json b/package.json
index 815c9a7..365945e 100644
--- a/package.json
+++ b/package.json
@@ -25,7 +25,8 @@
     "test:streaming": "node tests/test-multiturn-thinking-tools-streaming.cjs",
     "test:interleaved": "node tests/test-interleaved-thinking.cjs",
     "test:images": "node tests/test-images.cjs",
-    "test:caching": "node tests/test-caching-streaming.cjs"
+    "test:caching": "node tests/test-caching-streaming.cjs",
+    "test:crossmodel": "node tests/test-cross-model-thinking.cjs"
   },
   "keywords": [
     "claude",
diff --git a/src/cloudcode/sse-streamer.js b/src/cloudcode/sse-streamer.js
index 8c8974e..eaf9136 100644
--- a/src/cloudcode/sse-streamer.js
+++ b/src/cloudcode/sse-streamer.js
@@ -6,8 +6,8 @@
  */
 
 import crypto from 'crypto';
-import { MIN_SIGNATURE_LENGTH } from '../constants.js';
-import { cacheSignature } from '../format/signature-cache.js';
+import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js';
+import { cacheSignature, cacheThinkingSignature } from '../format/signature-cache.js';
 import { logger } from '../utils/logger.js';
 
 /**
@@ -110,6 +110,9 @@ export async function* streamSSEResponse(response, originalModel) {
 
                         if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
                             currentThinkingSignature = signature;
+                            // Cache thinking signature with model family for cross-model compatibility
+                            const modelFamily = getModelFamily(originalModel);
+                            cacheThinkingSignature(signature, modelFamily);
                         }
 
                         yield {
diff --git a/src/format/content-converter.js b/src/format/content-converter.js
index 052eb0a..8599847 100644
--- a/src/format/content-converter.js
+++ b/src/format/content-converter.js
@@ -4,7 +4,7 @@
  */
 
 import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js';
-import { getCachedSignature } from './signature-cache.js';
+import { getCachedSignature, getCachedSignatureFamily } from './signature-cache.js';
 import { logger } from '../utils/logger.js';
 
 /**
@@ -155,16 +155,31 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
             // Add any images from the tool result as separate parts
             parts.push(...imageParts);
         } else if (block.type === 'thinking') {
-            // Handle thinking blocks - only those with valid signatures
+            // Handle thinking blocks with signature compatibility check
             if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) {
-                // Convert to Gemini format with signature
+                const signatureFamily = getCachedSignatureFamily(block.signature);
+                const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
+
+                // Drop blocks with incompatible signatures for Gemini (cross-model switch)
+                if (isGeminiModel && signatureFamily && targetFamily && signatureFamily !== targetFamily) {
+                    logger.debug(`[ContentConverter] Dropping incompatible ${signatureFamily} thinking for ${targetFamily} model`);
+                    continue;
+                }
+
+                // Drop blocks with unknown signature origin for Gemini (cold cache - safe default)
+                if (isGeminiModel && !signatureFamily && targetFamily) {
+                    logger.debug(`[ContentConverter] Dropping thinking with unknown signature origin`);
+                    continue;
+                }
+
+                // Compatible - convert to Gemini format with signature
                 parts.push({
                     text: block.thinking,
                     thought: true,
                     thoughtSignature: block.signature
                 });
             }
-            // Unsigned thinking blocks are dropped upstream
+            // Unsigned thinking blocks are dropped (existing behavior)
         }
     }
 
diff --git a/src/format/request-converter.js b/src/format/request-converter.js
index 17e67ed..7343439 100644
--- a/src/format/request-converter.js
+++ b/src/format/request-converter.js
@@ -77,12 +77,14 @@ export function convertAnthropicToGoogle(anthropicRequest) {
         }
     }
 
-    // Apply thinking recovery for Gemini thinking models when needed
-    // This handles corrupted tool loops where thinking blocks are stripped
-    // Claude models handle this differently and don't need this recovery
+    // Apply thinking recovery for thinking models when needed
+    // - Gemini: needs recovery for tool loops/interrupted tools (stripped thinking)
+    // - Claude: needs recovery ONLY when cross-model (incompatible Gemini signatures will be dropped)
     let processedMessages = messages;
-    if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
-        logger.debug('[RequestConverter] Applying thinking recovery for Gemini');
+    const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
+
+    if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) {
+        logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`);
         processedMessages = closeToolLoopForThinking(messages);
     }
 
@@ -107,7 +109,7 @@ export function convertAnthropicToGoogle(anthropicRequest) {
         // This happens when all thinking blocks are filtered out (unsigned)
         if (parts.length === 0) {
             logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
-            parts.push({ text: '' });
+            parts.push({ text: '.' });
         }
 
         const content = {
diff --git a/src/format/response-converter.js b/src/format/response-converter.js
index 59b919b..c58d57c 100644
--- a/src/format/response-converter.js
+++ b/src/format/response-converter.js
@@ -4,8 +4,8 @@
  */
 
 import crypto from 'crypto';
-import { MIN_SIGNATURE_LENGTH } from '../constants.js';
-import { cacheSignature } from './signature-cache.js';
+import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js';
+import { cacheSignature, cacheThinkingSignature } from './signature-cache.js';
 
 /**
  * Convert Google Generative AI response to Anthropic Messages API format
@@ -33,6 +33,12 @@ export function convertGoogleToAnthropic(googleResponse, model) {
             if (part.thought === true) {
                 const signature = part.thoughtSignature || '';
 
+                // Cache thinking signature with model family for cross-model compatibility
+                if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
+                    const modelFamily = getModelFamily(model);
+                    cacheThinkingSignature(signature, modelFamily);
+                }
+
                 // Include thinking blocks in the response for Claude Code
                 anthropicContent.push({
                     type: 'thinking',
diff --git a/src/format/signature-cache.js b/src/format/signature-cache.js
index 49154ee..944c821 100644
--- a/src/format/signature-cache.js
+++ b/src/format/signature-cache.js
@@ -5,11 +5,15 @@
  * Gemini models require thoughtSignature on tool calls, but Claude Code
  * strips non-standard fields. This cache stores signatures by tool_use_id
  * so they can be restored in subsequent requests.
+ *
+ * Also caches thinking block signatures with model family for cross-model
+ * compatibility checking.
  */
 
-import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js';
+import { GEMINI_SIGNATURE_CACHE_TTL_MS, MIN_SIGNATURE_LENGTH } from '../constants.js';
 
 const signatureCache = new Map();
+const thinkingSignatureCache = new Map();
 
 /**
  * Store a signature for a tool_use_id
@@ -54,6 +58,11 @@ export function cleanupCache() {
             signatureCache.delete(key);
         }
     }
+    for (const [key, entry] of thinkingSignatureCache) {
+        if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
+            thinkingSignatureCache.delete(key);
+        }
+    }
 }
 
 /**
@@ -63,3 +72,43 @@ export function cleanupCache() {
 export function getCacheSize() {
     return signatureCache.size;
 }
+
+/**
+ * Cache a thinking block signature with its model family
+ * @param {string} signature - The thinking signature to cache
+ * @param {string} modelFamily - The model family ('claude' or 'gemini')
+ */
+export function cacheThinkingSignature(signature, modelFamily) {
+    if (!signature || signature.length < MIN_SIGNATURE_LENGTH) return;
+    thinkingSignatureCache.set(signature, {
+        modelFamily,
+        timestamp: Date.now()
+    });
+}
+
+/**
+ * Get the cached model family for a thinking signature
+ * @param {string} signature - The signature to look up
+ * @returns {string|null} 'claude', 'gemini', or null if not found/expired
+ */
+export function getCachedSignatureFamily(signature) {
+    if (!signature) return null;
+    const entry = thinkingSignatureCache.get(signature);
+    if (!entry) return null;
+
+    // Check TTL
+    if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
+        thinkingSignatureCache.delete(signature);
+        return null;
+    }
+
+    return entry.modelFamily;
+}
+
+/**
+ * Get the current thinking signature cache size (for debugging)
+ * @returns {number} Number of entries in the thinking signature cache
+ */
+export function getThinkingCacheSize() {
+    return thinkingSignatureCache.size;
+}
diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index 7fca77b..9358311 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -386,16 +386,23 @@ export function analyzeConversationState(messages) {
 
 /**
  * Check if conversation needs thinking recovery.
- * Returns true when:
- * 1. We're in a tool loop but have no valid thinking blocks, OR
- * 2. We have an interrupted tool with no valid thinking blocks
+ *
+ * For Gemini: recovery needed when (tool loop OR interrupted tool) AND no valid thinking
+ * For Claude: recovery needed when no valid compatible thinking (cross-model detection)
  *
  * @param {Array<Object>} messages - Array of messages
+ * @param {string} targetFamily - Target model family ('claude' or 'gemini')
  * @returns {boolean} True if thinking recovery is needed
  */
-export function needsThinkingRecovery(messages) {
-    const state = analyzeConversationState(messages);
-    // Need recovery if (tool loop OR interrupted tool) AND no thinking
+export function needsThinkingRecovery(messages, targetFamily = null) {
+    const state = analyzeConversationState(messages, targetFamily);
+
+    if (targetFamily === 'claude') {
+        // Claude: only check if thinking is valid/compatible
+        return !state.turnHasThinking;
+    }
+
+    // Gemini (default): check tool loop/interrupted AND no thinking
     return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
 }
 
@@ -414,9 +421,9 @@ function stripAllThinkingBlocks(messages) {
         const filtered = content.filter(block => !isThinkingPart(block));
 
         if (msg.content) {
-            return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] };
+            return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] };
         } else if (msg.parts) {
-            return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] };
+            return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '.' }] };
         }
         return msg;
     });
diff --git a/tests/test-cross-model-thinking.cjs b/tests/test-cross-model-thinking.cjs
new file mode 100644
index 0000000..b7f6704
--- /dev/null
+++ b/tests/test-cross-model-thinking.cjs
@@ -0,0 +1,511 @@
+/**
+ * Cross-Model Thinking Signature Test
+ *
+ * Tests that switching between Claude and Gemini models mid-conversation
+ * properly handles incompatible thinking signatures.
+ *
+ * Scenarios tested:
+ * 1. Claude → Gemini: Claude thinking signatures should be dropped
+ * 2. Gemini → Claude: Gemini thinking signatures should be dropped
+ * 3. Both should still work without errors (thinking recovery kicks in)
+ */
+const { streamRequest, nonStreamRequest, analyzeContent, commonTools } = require('./helpers/http-client.cjs');
+const { getModelConfig } = require('./helpers/test-models.cjs');
+
+const tools = [commonTools.executeCommand];
+
+// Test models
+const CLAUDE_MODEL = 'claude-sonnet-4-5-thinking';
+const GEMINI_MODEL = 'gemini-3-flash';
+
+async function testClaudeToGemini() {
+    console.log('='.repeat(60));
+    console.log('TEST: Claude → Gemini Cross-Model Switch');
+    console.log('Simulates starting with Claude, then switching to Gemini');
+    console.log('='.repeat(60));
+    console.log('');
+
+    const claudeConfig = getModelConfig('claude');
+    const geminiConfig = getModelConfig('gemini');
+
+    // TURN 1: Get response from Claude with thinking + tool use
+    console.log('TURN 1: Request to Claude (get thinking signature)');
+    console.log('-'.repeat(40));
+
+    const turn1Messages = [
+        { role: 'user', content: 'Run the command "ls -la" to list files.' }
+    ];
+
+    const turn1Result = await streamRequest({
+        model: CLAUDE_MODEL,
+        max_tokens: claudeConfig.max_tokens,
+        stream: true,
+        tools,
+        thinking: claudeConfig.thinking,
+        messages: turn1Messages
+    });
+
+    const turn1Content = analyzeContent(turn1Result.content);
+    console.log(`  Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
+    console.log(`  Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
+    console.log(`  Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
+
+    if (!turn1Content.hasToolUse) {
+        console.log('  SKIP: No tool use in turn 1');
+        return { passed: false, skipped: true };
+    }
+
+    // Extract thinking and tool_use for the assistant message
+    const assistantContent = [];
+    if (turn1Content.hasThinking && turn1Content.thinking[0]) {
+        assistantContent.push({
+            type: 'thinking',
+            thinking: turn1Content.thinking[0].thinking,
+            signature: turn1Content.thinking[0].signature || ''
+        });
+    }
+    if (turn1Content.hasText && turn1Content.text[0]) {
+        assistantContent.push({
+            type: 'text',
+            text: turn1Content.text[0].text
+        });
+    }
+    for (const tool of turn1Content.toolUse) {
+        assistantContent.push({
+            type: 'tool_use',
+            id: tool.id,
+            name: tool.name,
+            input: tool.input
+        });
+    }
+
+    const signatureLength = turn1Content.thinking[0]?.signature?.length || 0;
+    console.log(`  Claude signature length: ${signatureLength}`);
+
+    // TURN 2: Switch to Gemini with Claude's thinking signature in history
+    console.log('\nTURN 2: Request to Gemini (with Claude thinking in history)');
+    console.log('-'.repeat(40));
+
+    const turn2Messages = [
+        { role: 'user', content: 'Run the command "ls -la" to list files.' },
+        { role: 'assistant', content: assistantContent },
+        {
+            role: 'user',
+            content: [{
+                type: 'tool_result',
+                tool_use_id: turn1Content.toolUse[0].id,
+                content: 'total 16\ndrwxr-xr-x  5 user staff  160 Jan  1 12:00 .\ndrwxr-xr-x  3 user staff   96 Jan  1 12:00 ..\n-rw-r--r--  1 user staff  100 Jan  1 12:00 file.txt'
+            }]
+        }
+    ];
+
+    try {
+        const turn2Result = await streamRequest({
+            model: GEMINI_MODEL,
+            max_tokens: geminiConfig.max_tokens,
+            stream: true,
+            tools,
+            thinking: geminiConfig.thinking,
+            messages: turn2Messages
+        });
+
+        const turn2Content = analyzeContent(turn2Result.content);
+        console.log(`  Response received: YES`);
+        console.log(`  Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
+        console.log(`  Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
+        console.log(`  Error: NO`);
+
+        // Success if we got any response without error
+        const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
+        console.log(`  Result: ${passed ? 'PASS' : 'FAIL'}`);
+        return { passed };
+    } catch (error) {
+        console.log(`  Error: ${error.message}`);
+        console.log(`  Result: FAIL`);
+        return { passed: false, error: error.message };
+    }
+}
+
+async function testGeminiToClaude() {
+    console.log('\n' + '='.repeat(60));
+    console.log('TEST: Gemini → Claude Cross-Model Switch');
+    console.log('Simulates starting with Gemini, then switching to Claude');
+    console.log('='.repeat(60));
+    console.log('');
+
+    const claudeConfig = getModelConfig('claude');
+    const geminiConfig = getModelConfig('gemini');
+
+    // TURN 1: Get response from Gemini with thinking + tool use
+    console.log('TURN 1: Request to Gemini (get thinking signature)');
+    console.log('-'.repeat(40));
+
+    const turn1Messages = [
+        { role: 'user', content: 'Run the command "pwd" to show current directory.' }
+    ];
+
+    const turn1Result = await streamRequest({
+        model: GEMINI_MODEL,
+        max_tokens: geminiConfig.max_tokens,
+        stream: true,
+        tools,
+        thinking: geminiConfig.thinking,
+        messages: turn1Messages
+    });
+
+    const turn1Content = analyzeContent(turn1Result.content);
+    console.log(`  Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
+    console.log(`  Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
+    console.log(`  Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
+
+    if (!turn1Content.hasToolUse) {
+        console.log('  SKIP: No tool use in turn 1');
+        return { passed: false, skipped: true };
+    }
+
+    // Extract content for the assistant message
+    const assistantContent = [];
+    if (turn1Content.hasThinking && turn1Content.thinking[0]) {
+        assistantContent.push({
+            type: 'thinking',
+            thinking: turn1Content.thinking[0].thinking,
+            signature: turn1Content.thinking[0].signature || ''
+        });
+    }
+    if (turn1Content.hasText && turn1Content.text[0]) {
+        assistantContent.push({
+            type: 'text',
+            text: turn1Content.text[0].text
+        });
+    }
+    for (const tool of turn1Content.toolUse) {
+        const toolBlock = {
+            type: 'tool_use',
+            id: tool.id,
+            name: tool.name,
+            input: tool.input
+        };
+        // Include thoughtSignature if present (Gemini puts it on tool_use)
+        if (tool.thoughtSignature) {
+            toolBlock.thoughtSignature = tool.thoughtSignature;
+        }
+        assistantContent.push(toolBlock);
+    }
+
+    const thinkingSigLength = turn1Content.thinking[0]?.signature?.length || 0;
+    const toolUseSigLength = turn1Content.toolUse[0]?.thoughtSignature?.length || 0;
+    console.log(`  Gemini thinking signature length: ${thinkingSigLength}`);
+    console.log(`  Gemini tool_use signature length: ${toolUseSigLength}`);
+
+    // TURN 2: Switch to Claude with Gemini's thinking signature in history
+    console.log('\nTURN 2: Request to Claude (with Gemini thinking in history)');
+    console.log('-'.repeat(40));
+    console.log(`  Assistant content being sent: ${JSON.stringify(assistantContent).substring(0, 400)}`);
+
+    const turn2Messages = [
+        { role: 'user', content: 'Run the command "pwd" to show current directory.' },
+        { role: 'assistant', content: assistantContent },
+        {
+            role: 'user',
+            content: [{
+                type: 'tool_result',
+                tool_use_id: turn1Content.toolUse[0].id,
+                content: '/home/user/projects'
+            }]
+        }
+    ];
+
+    try {
+        const turn2Result = await streamRequest({
+            model: CLAUDE_MODEL,
+            max_tokens: claudeConfig.max_tokens,
+            stream: true,
+            tools,
+            thinking: claudeConfig.thinking,
+            messages: turn2Messages
+        });
+
+        const turn2Content = analyzeContent(turn2Result.content);
+        console.log(`  Response received: YES`);
+        console.log(`  Stop reason: ${turn2Result.stop_reason}`);
+        console.log(`  Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
+        console.log(`  Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
+        console.log(`  Tool Use: ${turn2Content.hasToolUse ? 'YES' : 'NO'}`);
+        console.log(`  Raw content: ${JSON.stringify(turn2Result.content).substring(0, 300)}`);
+        console.log(`  Error: NO`);
+
+        // Success if we got any response without error
+        const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
+        console.log(`  Result: ${passed ? 'PASS' : 'FAIL'}`);
+        return { passed };
+    } catch (error) {
+        console.log(`  Error: ${error.message}`);
+        console.log(`  Result: FAIL`);
+        return { passed: false, error: error.message };
+    }
+}
+
+async function testSameModelContinuation() {
+    console.log('\n' + '='.repeat(60));
+    console.log('TEST: Same Model Continuation - Claude (Control Test)');
+    console.log('Verifies same-model multi-turn still works');
+    console.log('='.repeat(60));
+    console.log('');
+
+    const claudeConfig = getModelConfig('claude');
+
+    // TURN 1: Get response from Claude
+    console.log('TURN 1: Request to Claude');
+    console.log('-'.repeat(40));
+
+    const turn1Messages = [
+        { role: 'user', content: 'Run "echo hello" command.' }
+    ];
+
+    const turn1Result = await streamRequest({
+        model: CLAUDE_MODEL,
+        max_tokens: claudeConfig.max_tokens,
+        stream: true,
+        tools,
+        thinking: claudeConfig.thinking,
+        messages: turn1Messages
+    });
+
+    const turn1Content = analyzeContent(turn1Result.content);
+    console.log(`  Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
+    console.log(`  Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
+    console.log(`  Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
+
+    if (!turn1Content.hasToolUse) {
+        console.log('  SKIP: No tool use in turn 1');
+        return { passed: false, skipped: true };
+    }
+
+    // Build assistant message
+    const assistantContent = [];
+    if (turn1Content.hasThinking && turn1Content.thinking[0]) {
+        assistantContent.push({
+            type: 'thinking',
+            thinking: turn1Content.thinking[0].thinking,
+            signature: turn1Content.thinking[0].signature || ''
+        });
+    }
+    if (turn1Content.hasText && turn1Content.text[0]) {
+        assistantContent.push({
+            type: 'text',
+            text: turn1Content.text[0].text
+        });
+    }
+    for (const tool of turn1Content.toolUse) {
+        assistantContent.push({
+            type: 'tool_use',
+            id: tool.id,
+            name: tool.name,
+            input: tool.input
+        });
+    }
+
+    // TURN 2: Continue with same model
+    console.log('\nTURN 2: Continue with Claude (same model)');
+    console.log('-'.repeat(40));
+
+    const turn2Messages = [
+        { role: 'user', content: 'Run "echo hello" command.' },
+        { role: 'assistant', content: assistantContent },
+        {
+            role: 'user',
+            content: [{
+                type: 'tool_result',
+                tool_use_id: turn1Content.toolUse[0].id,
+                content: 'hello'
+            }]
+        }
+    ];
+
+    try {
+        const turn2Result = await streamRequest({
+            model: CLAUDE_MODEL,
+            max_tokens: claudeConfig.max_tokens,
+            stream: true,
+            tools,
+            thinking: claudeConfig.thinking,
+            messages: turn2Messages
+        });
+
+        const turn2Content = analyzeContent(turn2Result.content);
+        console.log(`  Response received: YES`);
+        console.log(`  Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
+        console.log(`  Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`);
+        console.log(`  Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
+        console.log(`  Error: NO`);
+
+        // For same model, we should preserve thinking with valid signature
+        const passed = turn2Content.hasText || turn2Content.hasThinking;
+        console.log(`  Result: ${passed ? 'PASS' : 'FAIL'}`);
+        return { passed };
+    } catch (error) {
+        console.log(`  Error: ${error.message}`);
+        console.log(`  Result: FAIL`);
+        return { passed: false, error: error.message };
+    }
+}
+
+async function testSameModelContinuationGemini() {
+    console.log('\n' + '='.repeat(60));
+    console.log('TEST: Same Model Continuation - Gemini (Control Test)');
+    console.log('Verifies same-model multi-turn still works for Gemini');
+    console.log('='.repeat(60));
+    console.log('');
+
+    const geminiConfig = getModelConfig('gemini');
+
+    // TURN 1: Get response from Gemini
+    console.log('TURN 1: Request to Gemini');
+    console.log('-'.repeat(40));
+
+    const turn1Messages = [
+        { role: 'user', content: 'Run "echo world" command.' }
+    ];
+
+    const turn1Result = await streamRequest({
+        model: GEMINI_MODEL,
+        max_tokens: geminiConfig.max_tokens,
+        stream: true,
+        tools,
+        thinking: geminiConfig.thinking,
+        messages: turn1Messages
+    });
+
+    const turn1Content = analyzeContent(turn1Result.content);
+    console.log(`  Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
+    console.log(`  Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
+    console.log(`  Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
+
+    if (!turn1Content.hasToolUse) {
+        console.log('  SKIP: No tool use in turn 1');
+        return { passed: false, skipped: true };
+    }
+
+    // Build assistant message
+    const assistantContent = [];
+    if (turn1Content.hasThinking && turn1Content.thinking[0]) {
+        assistantContent.push({
+            type: 'thinking',
+            thinking: turn1Content.thinking[0].thinking,
+            signature: turn1Content.thinking[0].signature || ''
+        });
+    }
+    if (turn1Content.hasText && turn1Content.text[0]) {
+        assistantContent.push({
+            type: 'text',
+            text: turn1Content.text[0].text
+        });
+    }
+    for (const tool of turn1Content.toolUse) {
+        const toolBlock = {
+            type: 'tool_use',
+            id: tool.id,
+            name: tool.name,
+            input: tool.input
+        };
+        // Include thoughtSignature if present (Gemini puts it on tool_use)
+        if (tool.thoughtSignature) {
+            toolBlock.thoughtSignature = tool.thoughtSignature;
+        }
+        assistantContent.push(toolBlock);
+    }
+
+    // TURN 2: Continue with same model
+    console.log('\nTURN 2: Continue with Gemini (same model)');
+    console.log('-'.repeat(40));
+
+    const turn2Messages = [
+        { role: 'user', content: 'Run "echo world" command.' },
+        { role: 'assistant', content: assistantContent },
+        {
+            role: 'user',
+            content: [{
+                type: 'tool_result',
+                tool_use_id: turn1Content.toolUse[0].id,
+                content: 'world'
+            }]
+        }
+    ];
+
+    try {
+        const turn2Result = await streamRequest({
+            model: GEMINI_MODEL,
+            max_tokens: geminiConfig.max_tokens,
+            stream: true,
+            tools,
+            thinking: geminiConfig.thinking,
+            messages: turn2Messages
+        });
+
+        const turn2Content = analyzeContent(turn2Result.content);
+        console.log(`  Response received: YES`);
+        console.log(`  Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
+        console.log(`  Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`);
+        console.log(`  Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
+        console.log(`  Error: NO`);
+
+        // For same model, we should get a response
+        const passed = turn2Content.hasText || turn2Content.hasThinking;
+        console.log(`  Result: ${passed ? 'PASS' : 'FAIL'}`);
+        return { passed };
+    } catch (error) {
+        console.log(`  Error: ${error.message}`);
+        console.log(`  Result: FAIL`);
+        return { passed: false, error: error.message };
+    }
+}
+
+async function main() {
+    console.log('\n');
+    console.log('╔' + '═'.repeat(58) + '╗');
+    console.log('║' + '      CROSS-MODEL THINKING SIGNATURE TEST SUITE          '.padEnd(58) + '║');
+    console.log('║' + '      Tests switching between Claude and Gemini          '.padEnd(58) + '║');
+    console.log('╚' + '═'.repeat(58) + '╝');
+    console.log('\n');
+
+    const results = [];
+
+    // Test 1: Claude → Gemini
+    const claudeToGemini = await testClaudeToGemini();
+    results.push({ name: 'Claude → Gemini', ...claudeToGemini });
+
+    // Test 2: Gemini → Claude
+    const geminiToClaude = await testGeminiToClaude();
+    results.push({ name: 'Gemini → Claude', ...geminiToClaude });
+
+    // Test 3: Same model Claude (control)
+    const sameModelClaude = await testSameModelContinuation();
+    results.push({ name: 'Same Model (Claude → Claude)', ...sameModelClaude });
+
+    // Test 4: Same model Gemini (control)
+    const sameModelGemini = await testSameModelContinuationGemini();
+    results.push({ name: 'Same Model (Gemini → Gemini)', ...sameModelGemini });
+
+    // Summary
+    console.log('\n' + '='.repeat(60));
+    console.log('SUMMARY');
+    console.log('='.repeat(60));
+
+    let allPassed = true;
+    for (const result of results) {
+        const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL');
+        console.log(`  [${status}] ${result.name}`);
+        if (!result.passed && !result.skipped) allPassed = false;
+    }
+
+    console.log('\n' + '='.repeat(60));
+    console.log(`FINAL RESULT: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
+    console.log('='.repeat(60));
+
+    process.exit(allPassed ? 0 : 1);
+}
+
+main().catch(err => {
+    console.error('Test error:', err);
+    process.exit(1);
+});

From 602d6ca0f8f1dbdac1c769b47facfe6a83e7bd6f Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 22:05:16 +0530
Subject: [PATCH 03/10] move fallback map to constants

---
 src/constants.js       | 12 +++++++++++-
 src/fallback-config.js | 17 +++++------------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/constants.js b/src/constants.js
index d957923..7c4e75a 100644
--- a/src/constants.js
+++ b/src/constants.js
@@ -144,6 +144,15 @@ export const OAUTH_CONFIG = {
 };
 export const OAUTH_REDIRECT_URI = `http://localhost:${OAUTH_CONFIG.callbackPort}/oauth-callback`;
 
+// Model fallback mapping - maps primary model to fallback when quota exhausted
+export const MODEL_FALLBACK_MAP = {
+    'gemini-3-pro-high': 'claude-sonnet-4-5-thinking',
+    'gemini-3-pro-low': 'claude-sonnet-4-5',
+    'claude-opus-4-5-thinking': 'gemini-3-pro-high',
+    'claude-sonnet-4-5-thinking': 'gemini-3-pro-high',
+    'claude-sonnet-4-5': 'gemini-3-pro-low'
+};
+
 export default {
     ANTIGRAVITY_ENDPOINT_FALLBACKS,
     ANTIGRAVITY_HEADERS,
@@ -165,5 +174,6 @@ export default {
     getModelFamily,
     isThinkingModel,
     OAUTH_CONFIG,
-    OAUTH_REDIRECT_URI
+    OAUTH_REDIRECT_URI,
+    MODEL_FALLBACK_MAP
 };
diff --git a/src/fallback-config.js b/src/fallback-config.js
index 880e5ac..894cdee 100644
--- a/src/fallback-config.js
+++ b/src/fallback-config.js
@@ -1,21 +1,14 @@
 /**
  * Model Fallback Configuration
- * 
+ *
  * Defines fallback mappings for when a model's quota is exhausted across all accounts.
  * Enables graceful degradation to alternative models with similar capabilities.
  */
 
-/**
- * Model fallback mapping
- * Maps primary model ID to fallback model ID
- */
-export const MODEL_FALLBACK_MAP = {
-    'gemini-3-pro-high': 'claude-sonnet-4-5-thinking',
-    'gemini-3-pro-low': 'claude-sonnet-4-5',
-    'claude-opus-4-5-thinking': 'gemini-3-pro-high',
-    'claude-sonnet-4-5-thinking': 'gemini-3-pro-high',
-    'claude-sonnet-4-5': 'gemini-3-pro-low'
-};
+import { MODEL_FALLBACK_MAP } from './constants.js';
+
+// Re-export for convenience
+export { MODEL_FALLBACK_MAP };
 
 /**
  * Get fallback model for a given model ID

From b7286059eee5455553b37bcf654ad3b97c36fcc1 Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 22:13:35 +0530
Subject: [PATCH 04/10] remove targetFamily from analyzeConversationState

---
 src/format/thinking-utils.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index 9358311..83fa902 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -395,7 +395,7 @@ export function analyzeConversationState(messages) {
  * @returns {boolean} True if thinking recovery is needed
  */
 export function needsThinkingRecovery(messages, targetFamily = null) {
-    const state = analyzeConversationState(messages, targetFamily);
+    const state = analyzeConversationState(messages);
 
     if (targetFamily === 'claude') {
         // Claude: only check if thinking is valid/compatible

From 668c7aef26f7b44034a0711dcd552c2bd9bb660d Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 22:38:46 +0530
Subject: [PATCH 05/10] correct els if condition for state.inToolLoop

---
 src/format/thinking-utils.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index 83fa902..af9b3de 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -464,7 +464,7 @@ export function closeToolLoopForThinking(messages) {
         });
 
         logger.debug('[ThinkingUtils] Applied thinking recovery for interrupted tool');
-    } else {
+    } else if (state.inToolLoop) {
         // For tool loops: add synthetic messages to close the loop
         const syntheticText = state.toolResultCount === 1
             ? '[Tool execution completed.]'

From dc65499c49b561cf8affa01be0b03eee10fdac21 Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 23:17:38 +0530
Subject: [PATCH 06/10] Preserve valid thinking blocks during recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of stripping all thinking blocks during thinking recovery,
now only strips invalid or incompatible blocks. Uses signature cache
to validate family compatibility for cross-model fallback scenarios.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/format/request-converter.js |  2 +-
 src/format/thinking-utils.js    | 38 +++++++++++++++++++++++++++------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/src/format/request-converter.js b/src/format/request-converter.js
index 7343439..fa2d920 100644
--- a/src/format/request-converter.js
+++ b/src/format/request-converter.js
@@ -85,7 +85,7 @@ export function convertAnthropicToGoogle(anthropicRequest) {
 
     if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) {
         logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`);
-        processedMessages = closeToolLoopForThinking(messages);
+        processedMessages = closeToolLoopForThinking(messages, targetFamily);
     }
 
     // Convert messages to contents, then filter unsigned thinking blocks
diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index af9b3de..9017a8c 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -4,6 +4,7 @@
  */
 
 import { MIN_SIGNATURE_LENGTH } from '../constants.js';
+import { getCachedSignatureFamily } from './signature-cache.js';
 import { logger } from '../utils/logger.js';
 
 /**
@@ -407,18 +408,40 @@ export function needsThinkingRecovery(messages, targetFamily = null) {
 }
 
 /**
- * Strip all thinking blocks from messages.
+ * Strip invalid or incompatible thinking blocks from messages.
  * Used before injecting synthetic messages for recovery.
+ * Keeps valid thinking blocks to preserve context from previous turns.
  *
  * @param {Array<Object>} messages - Array of messages
- * @returns {Array<Object>} Messages with all thinking blocks removed
+ * @param {string} targetFamily - Target model family ('claude' or 'gemini')
+ * @returns {Array<Object>} Messages with invalid thinking blocks removed
  */
-function stripAllThinkingBlocks(messages) {
+function stripInvalidThinkingBlocks(messages, targetFamily = null) {
     return messages.map(msg => {
         const content = msg.content || msg.parts;
         if (!Array.isArray(content)) return msg;
 
-        const filtered = content.filter(block => !isThinkingPart(block));
+        const filtered = content.filter(block => {
+            // Keep non-thinking blocks
+            if (!isThinkingPart(block)) return true;
+
+            // Check generic validity (has signature of sufficient length)
+            if (!hasValidSignature(block)) return false;
+
+            // Check family compatibility if targetFamily is provided
+            if (targetFamily) {
+                const signature = block.thought === true ? block.thoughtSignature : block.signature;
+                const signatureFamily = getCachedSignatureFamily(signature);
+
+                // Strict validation: If we don't know the family (cache miss) or it doesn't match,
+                // we drop it. We don't assume validity for unknown signatures.
+                if (signatureFamily !== targetFamily) {
+                    return false;
+                }
+            }
+
+            return true;
+        });
 
         if (msg.content) {
             return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] };
@@ -439,16 +462,17 @@ function stripAllThinkingBlocks(messages) {
  * loop and allow the model to continue.
  *
  * @param {Array<Object>} messages - Array of messages
+ * @param {string} targetFamily - Target model family ('claude' or 'gemini')
  * @returns {Array<Object>} Modified messages with synthetic messages injected
  */
-export function closeToolLoopForThinking(messages) {
+export function closeToolLoopForThinking(messages, targetFamily = null) {
     const state = analyzeConversationState(messages);
 
     // Handle neither tool loop nor interrupted tool
     if (!state.inToolLoop && !state.interruptedTool) return messages;
 
-    // Strip all thinking blocks
-    let modified = stripAllThinkingBlocks(messages);
+    // Strip only invalid/incompatible thinking blocks (keep valid ones)
+    let modified = stripInvalidThinkingBlocks(messages, targetFamily);
 
     if (state.interruptedTool) {
         // For interrupted tools: just strip thinking and add a synthetic assistant message

From 12e427e9d5b7b6ffc8e992caced7e9f9833ebe2f Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 23:24:52 +0530
Subject: [PATCH 07/10] Fix needsThinkingRecovery to require tool loop context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/format/request-converter.js |  2 +-
 src/format/thinking-utils.js    | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/format/request-converter.js b/src/format/request-converter.js
index fa2d920..98a378c 100644
--- a/src/format/request-converter.js
+++ b/src/format/request-converter.js
@@ -83,7 +83,7 @@ export function convertAnthropicToGoogle(anthropicRequest) {
     let processedMessages = messages;
     const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
 
-    if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) {
+    if (isThinking && targetFamily && needsThinkingRecovery(messages)) {
         logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`);
         processedMessages = closeToolLoopForThinking(messages, targetFamily);
     }
diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index 9017a8c..3dcf883 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -388,23 +388,24 @@ export function analyzeConversationState(messages) {
 /**
  * Check if conversation needs thinking recovery.
  *
- * For Gemini: recovery needed when (tool loop OR interrupted tool) AND no valid thinking
- * For Claude: recovery needed when no valid compatible thinking (cross-model detection)
+ * Recovery is only needed when:
+ * 1. We're in a tool loop or have an interrupted tool, AND
+ * 2. No valid thinking blocks exist in the current turn
+ *
+ * Cross-model signature compatibility is handled by stripInvalidThinkingBlocks
+ * during recovery (not here).
  *
  * @param {Array<Object>} messages - Array of messages
- * @param {string} targetFamily - Target model family ('claude' or 'gemini')
  * @returns {boolean} True if thinking recovery is needed
  */
-export function needsThinkingRecovery(messages, targetFamily = null) {
+export function needsThinkingRecovery(messages) {
     const state = analyzeConversationState(messages);
 
-    if (targetFamily === 'claude') {
-        // Claude: only check if thinking is valid/compatible
-        return !state.turnHasThinking;
-    }
+    // Recovery is only needed in tool loops or interrupted tools
+    if (!state.inToolLoop && !state.interruptedTool) return false;
 
-    // Gemini (default): check tool loop/interrupted AND no thinking
-    return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
+    // Need recovery if no valid thinking blocks exist
+    return !state.turnHasThinking;
 }
 
 /**

From 53f8d7f6cc626b43efd2de41af00941137eac5a2 Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sat, 3 Jan 2026 23:29:21 +0530
Subject: [PATCH 08/10] Add debug logging when stripping thinking blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/format/thinking-utils.js | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index 3dcf883..06bca85 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -418,7 +418,9 @@ export function needsThinkingRecovery(messages) {
  * @returns {Array<Object>} Messages with invalid thinking blocks removed
  */
 function stripInvalidThinkingBlocks(messages, targetFamily = null) {
-    return messages.map(msg => {
+    let strippedCount = 0;
+
+    const result = messages.map(msg => {
         const content = msg.content || msg.parts;
         if (!Array.isArray(content)) return msg;
 
@@ -427,7 +429,10 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) {
             if (!isThinkingPart(block)) return true;
 
             // Check generic validity (has signature of sufficient length)
-            if (!hasValidSignature(block)) return false;
+            if (!hasValidSignature(block)) {
+                strippedCount++;
+                return false;
+            }
 
             // Check family compatibility if targetFamily is provided
             if (targetFamily) {
@@ -437,6 +442,7 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) {
                 // Strict validation: If we don't know the family (cache miss) or it doesn't match,
                 // we drop it. We don't assume validity for unknown signatures.
                 if (signatureFamily !== targetFamily) {
+                    strippedCount++;
                     return false;
                 }
             }
@@ -451,6 +457,12 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) {
         }
         return msg;
     });
+
+    if (strippedCount > 0) {
+        logger.debug(`[ThinkingUtils] Stripped ${strippedCount} invalid/incompatible thinking block(s)`);
+    }
+
+    return result;
 }
 
 /**

From 141558dd624805642fa247553e9f8d340beece9e Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sun, 4 Jan 2026 00:11:14 +0530
Subject: [PATCH 09/10] Improve cross-model thinking handling and add
 gemini-3-flash fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add gemini-3-flash to MODEL_FALLBACK_MAP for completeness
- Add hasGeminiHistory() to detect Gemini→Claude cross-model switch
- Trigger recovery for Claude only when Gemini history detected
- Remove unnecessary thinking block filtering for Claude-only conversations
- Add comments explaining '.' placeholder usage
- Remove unused filterUnsignedThinkingFromMessages function

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/constants.js                |  7 ++++---
 src/format/request-converter.js | 22 +++++++++++++++-------
 src/format/thinking-utils.js    | 26 +++++++++++++++++++++-----
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/src/constants.js b/src/constants.js
index 7c4e75a..06f9af7 100644
--- a/src/constants.js
+++ b/src/constants.js
@@ -146,11 +146,12 @@ export const OAUTH_REDIRECT_URI = `http://localhost:${OAUTH_CONFIG.callbackPort}
 
 // Model fallback mapping - maps primary model to fallback when quota exhausted
 export const MODEL_FALLBACK_MAP = {
-    'gemini-3-pro-high': 'claude-sonnet-4-5-thinking',
+    'gemini-3-pro-high': 'claude-opus-4-5-thinking',
     'gemini-3-pro-low': 'claude-sonnet-4-5',
+    'gemini-3-flash': 'claude-sonnet-4-5-thinking',
     'claude-opus-4-5-thinking': 'gemini-3-pro-high',
-    'claude-sonnet-4-5-thinking': 'gemini-3-pro-high',
-    'claude-sonnet-4-5': 'gemini-3-pro-low'
+    'claude-sonnet-4-5-thinking': 'gemini-3-flash',
+    'claude-sonnet-4-5': 'gemini-3-flash'
 };
 
 export default {
diff --git a/src/format/request-converter.js b/src/format/request-converter.js
index 98a378c..3e97406 100644
--- a/src/format/request-converter.js
+++ b/src/format/request-converter.js
@@ -15,6 +15,7 @@ import {
     removeTrailingThinkingBlocks,
     reorderAssistantContent,
     filterUnsignedThinkingBlocks,
+    hasGeminiHistory,
     needsThinkingRecovery,
     closeToolLoopForThinking
 } from './thinking-utils.js';
@@ -77,15 +78,20 @@ export function convertAnthropicToGoogle(anthropicRequest) {
         }
     }
 
-    // Apply thinking recovery for thinking models when needed
-    // - Gemini: needs recovery for tool loops/interrupted tools (stripped thinking)
-    // - Claude: needs recovery ONLY when cross-model (incompatible Gemini signatures will be dropped)
+    // Apply thinking recovery for Gemini thinking models when needed
+    // Gemini needs recovery for tool loops/interrupted tools (stripped thinking)
     let processedMessages = messages;
-    const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
 
-    if (isThinking && targetFamily && needsThinkingRecovery(messages)) {
-        logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`);
-        processedMessages = closeToolLoopForThinking(messages, targetFamily);
+    if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
+        logger.debug('[RequestConverter] Applying thinking recovery for Gemini');
+        processedMessages = closeToolLoopForThinking(messages, 'gemini');
+    }
+
+    // For Claude: apply recovery only for cross-model (Gemini→Claude) switch
+    // Detected by checking if history has Gemini-style tool_use with thoughtSignature
+    if (isClaudeModel && isThinking && hasGeminiHistory(messages) && needsThinkingRecovery(messages)) {
+        logger.debug('[RequestConverter] Applying thinking recovery for Claude (cross-model from Gemini)');
+        processedMessages = closeToolLoopForThinking(messages, 'claude');
     }
 
     // Convert messages to contents, then filter unsigned thinking blocks
@@ -108,6 +114,8 @@ export function convertAnthropicToGoogle(anthropicRequest) {
         // SAFETY: Google API requires at least one part per content message
         // This happens when all thinking blocks are filtered out (unsigned)
         if (parts.length === 0) {
+            // Use '.' instead of '' because claude models reject empty text parts.
+            // A single period is invisible in practice but satisfies the API requirement.
             logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
             parts.push({ text: '.' });
         }
diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js
index 06bca85..14ce530 100644
--- a/src/format/thinking-utils.js
+++ b/src/format/thinking-utils.js
@@ -27,6 +27,21 @@ export function hasValidSignature(part) {
     return typeof signature === 'string' && signature.length >= MIN_SIGNATURE_LENGTH;
 }
 
+/**
+ * Check if conversation history contains Gemini-style messages.
+ * Gemini puts thoughtSignature on tool_use blocks, Claude puts signature on thinking blocks.
+ * @param {Array<Object>} messages - Array of messages
+ * @returns {boolean} True if any tool_use has thoughtSignature (Gemini pattern)
+ */
+export function hasGeminiHistory(messages) {
+    return messages.some(msg =>
+        Array.isArray(msg.content) &&
+        msg.content.some(block =>
+            block.type === 'tool_use' && block.thoughtSignature !== undefined
+        )
+    );
+}
+
 /**
  * Sanitize a thinking part by keeping only allowed fields
  */
@@ -434,14 +449,14 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) {
                 return false;
             }
 
-            // Check family compatibility if targetFamily is provided
-            if (targetFamily) {
+            // Check family compatibility only for Gemini targets
+            // Claude can validate its own signatures, so we don't drop for Claude
+            if (targetFamily === 'gemini') {
                 const signature = block.thought === true ? block.thoughtSignature : block.signature;
                 const signatureFamily = getCachedSignatureFamily(signature);
 
-                // Strict validation: If we don't know the family (cache miss) or it doesn't match,
-                // we drop it. We don't assume validity for unknown signatures.
-                if (signatureFamily !== targetFamily) {
+                // For Gemini: drop unknown or mismatched signatures
+                if (!signatureFamily || signatureFamily !== targetFamily) {
                     strippedCount++;
                     return false;
                 }
@@ -450,6 +465,7 @@ function stripInvalidThinkingBlocks(messages, targetFamily = null) {
             return true;
         });
 
+        // Use '.' instead of '' because claude models reject empty text parts
         if (msg.content) {
             return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] };
         } else if (msg.parts) {

From e0b3f9077703fc9cf36f9d11b989cfadb10f9d3f Mon Sep 17 00:00:00 2001
From: Badri Narayanan S <s.badrinarayanan791@gmail.com>
Date: Sun, 4 Jan 2026 00:19:35 +0530
Subject: [PATCH 10/10] docs: update CLAUDE.md with model fallback and
 cross-model features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add --fallback and --debug startup flags to commands section
- Add test:crossmodel script for cross-model thinking tests
- Document fallback-config.js module in directory structure
- Add Model Fallback section explaining fallback behavior
- Add Cross-Model Thinking Signatures section explaining compatibility
- Update constants section with MODEL_FALLBACK_MAP

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 CLAUDE.md | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 443c3ff..70c7fb2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -17,6 +17,12 @@ npm install
 # Start server (runs on port 8080)
 npm start
 
+# Start with model fallback enabled (falls back to alternate model when quota exhausted)
+npm start -- --fallback
+
+# Start with debug logging
+npm start -- --debug
+
 # Start with file watching for development
 npm run dev
 
@@ -36,6 +42,7 @@ npm run test:streaming     # Streaming SSE events
 npm run test:interleaved   # Interleaved thinking
 npm run test:images        # Image processing
 npm run test:caching       # Prompt caching
+npm run test:crossmodel    # Cross-model thinking signatures
 ```
 
 ## Architecture
@@ -53,6 +60,7 @@ src/
 ├── server.js                   # Express server
 ├── constants.js                # Configuration values
 ├── errors.js                   # Custom error classes
+├── fallback-config.js          # Model fallback mappings and helpers
 │
 ├── cloudcode/                  # Cloud Code API client
 │   ├── index.js                # Public API exports
@@ -87,7 +95,7 @@ src/
 │   ├── content-converter.js    # Message content conversion
 │   ├── schema-sanitizer.js     # JSON Schema cleaning for Gemini
 │   ├── thinking-utils.js       # Thinking block validation/recovery
-│   └── signature-cache.js      # In-memory signature cache
+│   └── signature-cache.js      # Signature cache (tool_use + thinking signatures)
 │
 └── utils/                      # Utilities
     ├── helpers.js              # formatDuration, sleep
@@ -101,7 +109,8 @@ src/
 - **src/account-manager/**: Multi-account pool with sticky selection, rate limit handling, and automatic cooldown
 - **src/auth/**: Authentication including Google OAuth, token extraction, and database access
 - **src/format/**: Format conversion between Anthropic and Google Generative AI formats
-- **src/constants.js**: API endpoints, model mappings, OAuth config, and all configuration values
+- **src/constants.js**: API endpoints, model mappings, fallback config, OAuth config, and all configuration values
+- **src/fallback-config.js**: Model fallback mappings (`getFallbackModel()`, `hasFallback()`)
 - **src/errors.js**: Custom error classes (`RateLimitError`, `AuthError`, `ApiError`, etc.)
 
 **Multi-Account Load Balancing:**
@@ -117,6 +126,22 @@ src/
 - `cache_read_input_tokens` returned in usage metadata when cache hits
 - Token calculation: `input_tokens = promptTokenCount - cachedContentTokenCount`
 
+**Model Fallback (--fallback flag):**
+- When all accounts are exhausted for a model, automatically falls back to an alternate model
+- Fallback mappings defined in `MODEL_FALLBACK_MAP` in `src/constants.js`
+- Thinking models fall back to thinking models (e.g., `claude-sonnet-4-5-thinking` → `gemini-3-flash`)
+- Fallback is disabled on recursive calls to prevent infinite chains
+- Enable with `npm start -- --fallback` or `FALLBACK=true` environment variable
+
+**Cross-Model Thinking Signatures:**
+- Claude and Gemini use incompatible thinking signatures
+- When switching models mid-conversation, incompatible signatures are detected and dropped
+- Signature cache tracks model family ('claude' or 'gemini') for each signature
+- `hasGeminiHistory()` detects Gemini→Claude cross-model scenarios
+- Thinking recovery (`closeToolLoopForThinking()`) injects synthetic messages to close interrupted tool loops
+- For Gemini targets: strict validation - drops unknown or mismatched signatures
+- For Claude targets: lenient - lets Claude validate its own signatures
+
 ## Testing Notes
 
 - Tests require the server to be running (`npm start` in separate terminal)
@@ -129,6 +154,7 @@ src/
 **Constants:** All configuration values are centralized in `src/constants.js`:
 - API endpoints and headers
 - Model mappings and model family detection (`getModelFamily()`, `isThinkingModel()`)
+- Model fallback mappings (`MODEL_FALLBACK_MAP`)
 - OAuth configuration
 - Rate limit thresholds
 - Thinking model settings