Signature handling for fallback
This commit is contained in:
@@ -25,7 +25,8 @@
|
||||
"test:streaming": "node tests/test-multiturn-thinking-tools-streaming.cjs",
|
||||
"test:interleaved": "node tests/test-interleaved-thinking.cjs",
|
||||
"test:images": "node tests/test-images.cjs",
|
||||
"test:caching": "node tests/test-caching-streaming.cjs"
|
||||
"test:caching": "node tests/test-caching-streaming.cjs",
|
||||
"test:crossmodel": "node tests/test-cross-model-thinking.cjs"
|
||||
},
|
||||
"keywords": [
|
||||
"claude",
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
import { MIN_SIGNATURE_LENGTH } from '../constants.js';
|
||||
import { cacheSignature } from '../format/signature-cache.js';
|
||||
import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js';
|
||||
import { cacheSignature, cacheThinkingSignature } from '../format/signature-cache.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
/**
|
||||
@@ -110,6 +110,9 @@ export async function* streamSSEResponse(response, originalModel) {
|
||||
|
||||
if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
|
||||
currentThinkingSignature = signature;
|
||||
// Cache thinking signature with model family for cross-model compatibility
|
||||
const modelFamily = getModelFamily(originalModel);
|
||||
cacheThinkingSignature(signature, modelFamily);
|
||||
}
|
||||
|
||||
yield {
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
*/
|
||||
|
||||
import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js';
|
||||
import { getCachedSignature } from './signature-cache.js';
|
||||
import { getCachedSignature, getCachedSignatureFamily } from './signature-cache.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
/**
|
||||
@@ -155,16 +155,31 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
|
||||
// Add any images from the tool result as separate parts
|
||||
parts.push(...imageParts);
|
||||
} else if (block.type === 'thinking') {
|
||||
// Handle thinking blocks - only those with valid signatures
|
||||
// Handle thinking blocks with signature compatibility check
|
||||
if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) {
|
||||
// Convert to Gemini format with signature
|
||||
const signatureFamily = getCachedSignatureFamily(block.signature);
|
||||
const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
|
||||
|
||||
// Drop blocks with incompatible signatures for Gemini (cross-model switch)
|
||||
if (isGeminiModel && signatureFamily && targetFamily && signatureFamily !== targetFamily) {
|
||||
logger.debug(`[ContentConverter] Dropping incompatible ${signatureFamily} thinking for ${targetFamily} model`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Drop blocks with unknown signature origin for Gemini (cold cache - safe default)
|
||||
if (isGeminiModel && !signatureFamily && targetFamily) {
|
||||
logger.debug(`[ContentConverter] Dropping thinking with unknown signature origin`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compatible - convert to Gemini format with signature
|
||||
parts.push({
|
||||
text: block.thinking,
|
||||
thought: true,
|
||||
thoughtSignature: block.signature
|
||||
});
|
||||
}
|
||||
// Unsigned thinking blocks are dropped upstream
|
||||
// Unsigned thinking blocks are dropped (existing behavior)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -77,12 +77,14 @@ export function convertAnthropicToGoogle(anthropicRequest) {
|
||||
}
|
||||
}
|
||||
|
||||
// Apply thinking recovery for Gemini thinking models when needed
|
||||
// This handles corrupted tool loops where thinking blocks are stripped
|
||||
// Claude models handle this differently and don't need this recovery
|
||||
// Apply thinking recovery for thinking models when needed
|
||||
// - Gemini: needs recovery for tool loops/interrupted tools (stripped thinking)
|
||||
// - Claude: needs recovery ONLY when cross-model (incompatible Gemini signatures will be dropped)
|
||||
let processedMessages = messages;
|
||||
if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
|
||||
logger.debug('[RequestConverter] Applying thinking recovery for Gemini');
|
||||
const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
|
||||
|
||||
if (isThinking && targetFamily && needsThinkingRecovery(messages, targetFamily)) {
|
||||
logger.debug(`[RequestConverter] Applying thinking recovery for ${targetFamily}`);
|
||||
processedMessages = closeToolLoopForThinking(messages);
|
||||
}
|
||||
|
||||
@@ -107,7 +109,7 @@ export function convertAnthropicToGoogle(anthropicRequest) {
|
||||
// This happens when all thinking blocks are filtered out (unsigned)
|
||||
if (parts.length === 0) {
|
||||
logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
|
||||
parts.push({ text: '' });
|
||||
parts.push({ text: '.' });
|
||||
}
|
||||
|
||||
const content = {
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
import { MIN_SIGNATURE_LENGTH } from '../constants.js';
|
||||
import { cacheSignature } from './signature-cache.js';
|
||||
import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js';
|
||||
import { cacheSignature, cacheThinkingSignature } from './signature-cache.js';
|
||||
|
||||
/**
|
||||
* Convert Google Generative AI response to Anthropic Messages API format
|
||||
@@ -33,6 +33,12 @@ export function convertGoogleToAnthropic(googleResponse, model) {
|
||||
if (part.thought === true) {
|
||||
const signature = part.thoughtSignature || '';
|
||||
|
||||
// Cache thinking signature with model family for cross-model compatibility
|
||||
if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
|
||||
const modelFamily = getModelFamily(model);
|
||||
cacheThinkingSignature(signature, modelFamily);
|
||||
}
|
||||
|
||||
// Include thinking blocks in the response for Claude Code
|
||||
anthropicContent.push({
|
||||
type: 'thinking',
|
||||
|
||||
@@ -5,11 +5,15 @@
|
||||
* Gemini models require thoughtSignature on tool calls, but Claude Code
|
||||
* strips non-standard fields. This cache stores signatures by tool_use_id
|
||||
* so they can be restored in subsequent requests.
|
||||
*
|
||||
* Also caches thinking block signatures with model family for cross-model
|
||||
* compatibility checking.
|
||||
*/
|
||||
|
||||
import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js';
|
||||
import { GEMINI_SIGNATURE_CACHE_TTL_MS, MIN_SIGNATURE_LENGTH } from '../constants.js';
|
||||
|
||||
const signatureCache = new Map();
|
||||
const thinkingSignatureCache = new Map();
|
||||
|
||||
/**
|
||||
* Store a signature for a tool_use_id
|
||||
@@ -54,6 +58,11 @@ export function cleanupCache() {
|
||||
signatureCache.delete(key);
|
||||
}
|
||||
}
|
||||
for (const [key, entry] of thinkingSignatureCache) {
|
||||
if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
|
||||
thinkingSignatureCache.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -63,3 +72,43 @@ export function cleanupCache() {
|
||||
export function getCacheSize() {
|
||||
return signatureCache.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache a thinking block signature with its model family
|
||||
* @param {string} signature - The thinking signature to cache
|
||||
* @param {string} modelFamily - The model family ('claude' or 'gemini')
|
||||
*/
|
||||
export function cacheThinkingSignature(signature, modelFamily) {
|
||||
if (!signature || signature.length < MIN_SIGNATURE_LENGTH) return;
|
||||
thinkingSignatureCache.set(signature, {
|
||||
modelFamily,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cached model family for a thinking signature
|
||||
* @param {string} signature - The signature to look up
|
||||
* @returns {string|null} 'claude', 'gemini', or null if not found/expired
|
||||
*/
|
||||
export function getCachedSignatureFamily(signature) {
|
||||
if (!signature) return null;
|
||||
const entry = thinkingSignatureCache.get(signature);
|
||||
if (!entry) return null;
|
||||
|
||||
// Check TTL
|
||||
if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
|
||||
thinkingSignatureCache.delete(signature);
|
||||
return null;
|
||||
}
|
||||
|
||||
return entry.modelFamily;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current thinking signature cache size (for debugging)
|
||||
* @returns {number} Number of entries in the thinking signature cache
|
||||
*/
|
||||
export function getThinkingCacheSize() {
|
||||
return thinkingSignatureCache.size;
|
||||
}
|
||||
|
||||
@@ -386,16 +386,23 @@ export function analyzeConversationState(messages) {
|
||||
|
||||
/**
|
||||
* Check if conversation needs thinking recovery.
|
||||
* Returns true when:
|
||||
* 1. We're in a tool loop but have no valid thinking blocks, OR
|
||||
* 2. We have an interrupted tool with no valid thinking blocks
|
||||
*
|
||||
* For Gemini: recovery needed when (tool loop OR interrupted tool) AND no valid thinking
|
||||
* For Claude: recovery needed when no valid compatible thinking (cross-model detection)
|
||||
*
|
||||
* @param {Array<Object>} messages - Array of messages
|
||||
* @param {string} targetFamily - Target model family ('claude' or 'gemini')
|
||||
* @returns {boolean} True if thinking recovery is needed
|
||||
*/
|
||||
export function needsThinkingRecovery(messages) {
|
||||
const state = analyzeConversationState(messages);
|
||||
// Need recovery if (tool loop OR interrupted tool) AND no thinking
|
||||
export function needsThinkingRecovery(messages, targetFamily = null) {
|
||||
const state = analyzeConversationState(messages, targetFamily);
|
||||
|
||||
if (targetFamily === 'claude') {
|
||||
// Claude: only check if thinking is valid/compatible
|
||||
return !state.turnHasThinking;
|
||||
}
|
||||
|
||||
// Gemini (default): check tool loop/interrupted AND no thinking
|
||||
return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
|
||||
}
|
||||
|
||||
@@ -414,9 +421,9 @@ function stripAllThinkingBlocks(messages) {
|
||||
const filtered = content.filter(block => !isThinkingPart(block));
|
||||
|
||||
if (msg.content) {
|
||||
return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] };
|
||||
return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] };
|
||||
} else if (msg.parts) {
|
||||
return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] };
|
||||
return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '.' }] };
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
511
tests/test-cross-model-thinking.cjs
Normal file
511
tests/test-cross-model-thinking.cjs
Normal file
@@ -0,0 +1,511 @@
|
||||
/**
|
||||
* Cross-Model Thinking Signature Test
|
||||
*
|
||||
* Tests that switching between Claude and Gemini models mid-conversation
|
||||
* properly handles incompatible thinking signatures.
|
||||
*
|
||||
* Scenarios tested:
|
||||
* 1. Claude → Gemini: Claude thinking signatures should be dropped
|
||||
* 2. Gemini → Claude: Gemini thinking signatures should be dropped
|
||||
* 3. Both should still work without errors (thinking recovery kicks in)
|
||||
*/
|
||||
const { streamRequest, nonStreamRequest, analyzeContent, commonTools } = require('./helpers/http-client.cjs');
|
||||
const { getModelConfig } = require('./helpers/test-models.cjs');
|
||||
|
||||
const tools = [commonTools.executeCommand];
|
||||
|
||||
// Test models
|
||||
const CLAUDE_MODEL = 'claude-sonnet-4-5-thinking';
|
||||
const GEMINI_MODEL = 'gemini-3-flash';
|
||||
|
||||
async function testClaudeToGemini() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('TEST: Claude → Gemini Cross-Model Switch');
|
||||
console.log('Simulates starting with Claude, then switching to Gemini');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
const claudeConfig = getModelConfig('claude');
|
||||
const geminiConfig = getModelConfig('gemini');
|
||||
|
||||
// TURN 1: Get response from Claude with thinking + tool use
|
||||
console.log('TURN 1: Request to Claude (get thinking signature)');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn1Messages = [
|
||||
{ role: 'user', content: 'Run the command "ls -la" to list files.' }
|
||||
];
|
||||
|
||||
const turn1Result = await streamRequest({
|
||||
model: CLAUDE_MODEL,
|
||||
max_tokens: claudeConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: claudeConfig.thinking,
|
||||
messages: turn1Messages
|
||||
});
|
||||
|
||||
const turn1Content = analyzeContent(turn1Result.content);
|
||||
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
||||
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
||||
|
||||
if (!turn1Content.hasToolUse) {
|
||||
console.log(' SKIP: No tool use in turn 1');
|
||||
return { passed: false, skipped: true };
|
||||
}
|
||||
|
||||
// Extract thinking and tool_use for the assistant message
|
||||
const assistantContent = [];
|
||||
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
||||
assistantContent.push({
|
||||
type: 'thinking',
|
||||
thinking: turn1Content.thinking[0].thinking,
|
||||
signature: turn1Content.thinking[0].signature || ''
|
||||
});
|
||||
}
|
||||
if (turn1Content.hasText && turn1Content.text[0]) {
|
||||
assistantContent.push({
|
||||
type: 'text',
|
||||
text: turn1Content.text[0].text
|
||||
});
|
||||
}
|
||||
for (const tool of turn1Content.toolUse) {
|
||||
assistantContent.push({
|
||||
type: 'tool_use',
|
||||
id: tool.id,
|
||||
name: tool.name,
|
||||
input: tool.input
|
||||
});
|
||||
}
|
||||
|
||||
const signatureLength = turn1Content.thinking[0]?.signature?.length || 0;
|
||||
console.log(` Claude signature length: ${signatureLength}`);
|
||||
|
||||
// TURN 2: Switch to Gemini with Claude's thinking signature in history
|
||||
console.log('\nTURN 2: Request to Gemini (with Claude thinking in history)');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn2Messages = [
|
||||
{ role: 'user', content: 'Run the command "ls -la" to list files.' },
|
||||
{ role: 'assistant', content: assistantContent },
|
||||
{
|
||||
role: 'user',
|
||||
content: [{
|
||||
type: 'tool_result',
|
||||
tool_use_id: turn1Content.toolUse[0].id,
|
||||
content: 'total 16\ndrwxr-xr-x 5 user staff 160 Jan 1 12:00 .\ndrwxr-xr-x 3 user staff 96 Jan 1 12:00 ..\n-rw-r--r-- 1 user staff 100 Jan 1 12:00 file.txt'
|
||||
}]
|
||||
}
|
||||
];
|
||||
|
||||
try {
|
||||
const turn2Result = await streamRequest({
|
||||
model: GEMINI_MODEL,
|
||||
max_tokens: geminiConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: geminiConfig.thinking,
|
||||
messages: turn2Messages
|
||||
});
|
||||
|
||||
const turn2Content = analyzeContent(turn2Result.content);
|
||||
console.log(` Response received: YES`);
|
||||
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
||||
console.log(` Error: NO`);
|
||||
|
||||
// Success if we got any response without error
|
||||
const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
|
||||
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
||||
return { passed };
|
||||
} catch (error) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
console.log(` Result: FAIL`);
|
||||
return { passed: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function testGeminiToClaude() {
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST: Gemini → Claude Cross-Model Switch');
|
||||
console.log('Simulates starting with Gemini, then switching to Claude');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
const claudeConfig = getModelConfig('claude');
|
||||
const geminiConfig = getModelConfig('gemini');
|
||||
|
||||
// TURN 1: Get response from Gemini with thinking + tool use
|
||||
console.log('TURN 1: Request to Gemini (get thinking signature)');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn1Messages = [
|
||||
{ role: 'user', content: 'Run the command "pwd" to show current directory.' }
|
||||
];
|
||||
|
||||
const turn1Result = await streamRequest({
|
||||
model: GEMINI_MODEL,
|
||||
max_tokens: geminiConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: geminiConfig.thinking,
|
||||
messages: turn1Messages
|
||||
});
|
||||
|
||||
const turn1Content = analyzeContent(turn1Result.content);
|
||||
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
||||
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
||||
|
||||
if (!turn1Content.hasToolUse) {
|
||||
console.log(' SKIP: No tool use in turn 1');
|
||||
return { passed: false, skipped: true };
|
||||
}
|
||||
|
||||
// Extract content for the assistant message
|
||||
const assistantContent = [];
|
||||
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
||||
assistantContent.push({
|
||||
type: 'thinking',
|
||||
thinking: turn1Content.thinking[0].thinking,
|
||||
signature: turn1Content.thinking[0].signature || ''
|
||||
});
|
||||
}
|
||||
if (turn1Content.hasText && turn1Content.text[0]) {
|
||||
assistantContent.push({
|
||||
type: 'text',
|
||||
text: turn1Content.text[0].text
|
||||
});
|
||||
}
|
||||
for (const tool of turn1Content.toolUse) {
|
||||
const toolBlock = {
|
||||
type: 'tool_use',
|
||||
id: tool.id,
|
||||
name: tool.name,
|
||||
input: tool.input
|
||||
};
|
||||
// Include thoughtSignature if present (Gemini puts it on tool_use)
|
||||
if (tool.thoughtSignature) {
|
||||
toolBlock.thoughtSignature = tool.thoughtSignature;
|
||||
}
|
||||
assistantContent.push(toolBlock);
|
||||
}
|
||||
|
||||
const thinkingSigLength = turn1Content.thinking[0]?.signature?.length || 0;
|
||||
const toolUseSigLength = turn1Content.toolUse[0]?.thoughtSignature?.length || 0;
|
||||
console.log(` Gemini thinking signature length: ${thinkingSigLength}`);
|
||||
console.log(` Gemini tool_use signature length: ${toolUseSigLength}`);
|
||||
|
||||
// TURN 2: Switch to Claude with Gemini's thinking signature in history
|
||||
console.log('\nTURN 2: Request to Claude (with Gemini thinking in history)');
|
||||
console.log('-'.repeat(40));
|
||||
console.log(` Assistant content being sent: ${JSON.stringify(assistantContent).substring(0, 400)}`);
|
||||
|
||||
const turn2Messages = [
|
||||
{ role: 'user', content: 'Run the command "pwd" to show current directory.' },
|
||||
{ role: 'assistant', content: assistantContent },
|
||||
{
|
||||
role: 'user',
|
||||
content: [{
|
||||
type: 'tool_result',
|
||||
tool_use_id: turn1Content.toolUse[0].id,
|
||||
content: '/home/user/projects'
|
||||
}]
|
||||
}
|
||||
];
|
||||
|
||||
try {
|
||||
const turn2Result = await streamRequest({
|
||||
model: CLAUDE_MODEL,
|
||||
max_tokens: claudeConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: claudeConfig.thinking,
|
||||
messages: turn2Messages
|
||||
});
|
||||
|
||||
const turn2Content = analyzeContent(turn2Result.content);
|
||||
console.log(` Response received: YES`);
|
||||
console.log(` Stop reason: ${turn2Result.stop_reason}`);
|
||||
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
||||
console.log(` Tool Use: ${turn2Content.hasToolUse ? 'YES' : 'NO'}`);
|
||||
console.log(` Raw content: ${JSON.stringify(turn2Result.content).substring(0, 300)}`);
|
||||
console.log(` Error: NO`);
|
||||
|
||||
// Success if we got any response without error
|
||||
const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
|
||||
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
||||
return { passed };
|
||||
} catch (error) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
console.log(` Result: FAIL`);
|
||||
return { passed: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function testSameModelContinuation() {
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST: Same Model Continuation - Claude (Control Test)');
|
||||
console.log('Verifies same-model multi-turn still works');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
const claudeConfig = getModelConfig('claude');
|
||||
|
||||
// TURN 1: Get response from Claude
|
||||
console.log('TURN 1: Request to Claude');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn1Messages = [
|
||||
{ role: 'user', content: 'Run "echo hello" command.' }
|
||||
];
|
||||
|
||||
const turn1Result = await streamRequest({
|
||||
model: CLAUDE_MODEL,
|
||||
max_tokens: claudeConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: claudeConfig.thinking,
|
||||
messages: turn1Messages
|
||||
});
|
||||
|
||||
const turn1Content = analyzeContent(turn1Result.content);
|
||||
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
||||
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
||||
|
||||
if (!turn1Content.hasToolUse) {
|
||||
console.log(' SKIP: No tool use in turn 1');
|
||||
return { passed: false, skipped: true };
|
||||
}
|
||||
|
||||
// Build assistant message
|
||||
const assistantContent = [];
|
||||
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
||||
assistantContent.push({
|
||||
type: 'thinking',
|
||||
thinking: turn1Content.thinking[0].thinking,
|
||||
signature: turn1Content.thinking[0].signature || ''
|
||||
});
|
||||
}
|
||||
if (turn1Content.hasText && turn1Content.text[0]) {
|
||||
assistantContent.push({
|
||||
type: 'text',
|
||||
text: turn1Content.text[0].text
|
||||
});
|
||||
}
|
||||
for (const tool of turn1Content.toolUse) {
|
||||
assistantContent.push({
|
||||
type: 'tool_use',
|
||||
id: tool.id,
|
||||
name: tool.name,
|
||||
input: tool.input
|
||||
});
|
||||
}
|
||||
|
||||
// TURN 2: Continue with same model
|
||||
console.log('\nTURN 2: Continue with Claude (same model)');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn2Messages = [
|
||||
{ role: 'user', content: 'Run "echo hello" command.' },
|
||||
{ role: 'assistant', content: assistantContent },
|
||||
{
|
||||
role: 'user',
|
||||
content: [{
|
||||
type: 'tool_result',
|
||||
tool_use_id: turn1Content.toolUse[0].id,
|
||||
content: 'hello'
|
||||
}]
|
||||
}
|
||||
];
|
||||
|
||||
try {
|
||||
const turn2Result = await streamRequest({
|
||||
model: CLAUDE_MODEL,
|
||||
max_tokens: claudeConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: claudeConfig.thinking,
|
||||
messages: turn2Messages
|
||||
});
|
||||
|
||||
const turn2Content = analyzeContent(turn2Result.content);
|
||||
console.log(` Response received: YES`);
|
||||
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`);
|
||||
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
||||
console.log(` Error: NO`);
|
||||
|
||||
// For same model, we should preserve thinking with valid signature
|
||||
const passed = turn2Content.hasText || turn2Content.hasThinking;
|
||||
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
||||
return { passed };
|
||||
} catch (error) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
console.log(` Result: FAIL`);
|
||||
return { passed: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function testSameModelContinuationGemini() {
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST: Same Model Continuation - Gemini (Control Test)');
|
||||
console.log('Verifies same-model multi-turn still works for Gemini');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
const geminiConfig = getModelConfig('gemini');
|
||||
|
||||
// TURN 1: Get response from Gemini
|
||||
console.log('TURN 1: Request to Gemini');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn1Messages = [
|
||||
{ role: 'user', content: 'Run "echo world" command.' }
|
||||
];
|
||||
|
||||
const turn1Result = await streamRequest({
|
||||
model: GEMINI_MODEL,
|
||||
max_tokens: geminiConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: geminiConfig.thinking,
|
||||
messages: turn1Messages
|
||||
});
|
||||
|
||||
const turn1Content = analyzeContent(turn1Result.content);
|
||||
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
||||
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
||||
|
||||
if (!turn1Content.hasToolUse) {
|
||||
console.log(' SKIP: No tool use in turn 1');
|
||||
return { passed: false, skipped: true };
|
||||
}
|
||||
|
||||
// Build assistant message
|
||||
const assistantContent = [];
|
||||
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
||||
assistantContent.push({
|
||||
type: 'thinking',
|
||||
thinking: turn1Content.thinking[0].thinking,
|
||||
signature: turn1Content.thinking[0].signature || ''
|
||||
});
|
||||
}
|
||||
if (turn1Content.hasText && turn1Content.text[0]) {
|
||||
assistantContent.push({
|
||||
type: 'text',
|
||||
text: turn1Content.text[0].text
|
||||
});
|
||||
}
|
||||
for (const tool of turn1Content.toolUse) {
|
||||
const toolBlock = {
|
||||
type: 'tool_use',
|
||||
id: tool.id,
|
||||
name: tool.name,
|
||||
input: tool.input
|
||||
};
|
||||
// Include thoughtSignature if present (Gemini puts it on tool_use)
|
||||
if (tool.thoughtSignature) {
|
||||
toolBlock.thoughtSignature = tool.thoughtSignature;
|
||||
}
|
||||
assistantContent.push(toolBlock);
|
||||
}
|
||||
|
||||
// TURN 2: Continue with same model
|
||||
console.log('\nTURN 2: Continue with Gemini (same model)');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const turn2Messages = [
|
||||
{ role: 'user', content: 'Run "echo world" command.' },
|
||||
{ role: 'assistant', content: assistantContent },
|
||||
{
|
||||
role: 'user',
|
||||
content: [{
|
||||
type: 'tool_result',
|
||||
tool_use_id: turn1Content.toolUse[0].id,
|
||||
content: 'world'
|
||||
}]
|
||||
}
|
||||
];
|
||||
|
||||
try {
|
||||
const turn2Result = await streamRequest({
|
||||
model: GEMINI_MODEL,
|
||||
max_tokens: geminiConfig.max_tokens,
|
||||
stream: true,
|
||||
tools,
|
||||
thinking: geminiConfig.thinking,
|
||||
messages: turn2Messages
|
||||
});
|
||||
|
||||
const turn2Content = analyzeContent(turn2Result.content);
|
||||
console.log(` Response received: YES`);
|
||||
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
||||
console.log(` Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`);
|
||||
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
||||
console.log(` Error: NO`);
|
||||
|
||||
// For same model, we should get a response
|
||||
const passed = turn2Content.hasText || turn2Content.hasThinking;
|
||||
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
||||
return { passed };
|
||||
} catch (error) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
console.log(` Result: FAIL`);
|
||||
return { passed: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('\n');
|
||||
console.log('╔' + '═'.repeat(58) + '╗');
|
||||
console.log('║' + ' CROSS-MODEL THINKING SIGNATURE TEST SUITE '.padEnd(58) + '║');
|
||||
console.log('║' + ' Tests switching between Claude and Gemini '.padEnd(58) + '║');
|
||||
console.log('╚' + '═'.repeat(58) + '╝');
|
||||
console.log('\n');
|
||||
|
||||
const results = [];
|
||||
|
||||
// Test 1: Claude → Gemini
|
||||
const claudeToGemini = await testClaudeToGemini();
|
||||
results.push({ name: 'Claude → Gemini', ...claudeToGemini });
|
||||
|
||||
// Test 2: Gemini → Claude
|
||||
const geminiToClaude = await testGeminiToClaude();
|
||||
results.push({ name: 'Gemini → Claude', ...geminiToClaude });
|
||||
|
||||
// Test 3: Same model Claude (control)
|
||||
const sameModelClaude = await testSameModelContinuation();
|
||||
results.push({ name: 'Same Model (Claude → Claude)', ...sameModelClaude });
|
||||
|
||||
// Test 4: Same model Gemini (control)
|
||||
const sameModelGemini = await testSameModelContinuationGemini();
|
||||
results.push({ name: 'Same Model (Gemini → Gemini)', ...sameModelGemini });
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
let allPassed = true;
|
||||
for (const result of results) {
|
||||
const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL');
|
||||
console.log(` [${status}] ${result.name}`);
|
||||
if (!result.passed && !result.skipped) allPassed = false;
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log(`FINAL RESULT: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
process.exit(allPassed ? 0 : 1);
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Test error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user