When Claude Code strips thinking signatures it doesn't recognize, the proxy would drop unsigned thinking blocks, causing the error "Expected thinking but found text". This fix detects unsigned thinking blocks and triggers recovery to close the tool loop. Co-Authored-By: Claude <noreply@anthropic.com>
641 lines
23 KiB
JavaScript
641 lines
23 KiB
JavaScript
/**
|
|
* Cross-Model Thinking Signature Test
|
|
*
|
|
* Tests that switching between Claude and Gemini models mid-conversation
|
|
* properly handles incompatible thinking signatures.
|
|
*
|
|
* Scenarios tested:
|
|
* 1. Claude → Gemini: Claude thinking signatures should be dropped
|
|
* 2. Gemini → Claude: Gemini thinking signatures should be dropped
|
|
* 3. Both should still work without errors (thinking recovery kicks in)
|
|
*/
|
|
const { streamRequest, analyzeContent, commonTools } = require('./helpers/http-client.cjs');
|
|
const { getModelConfig, getModels } = require('./helpers/test-models.cjs');
|
|
|
|
const tools = [commonTools.executeCommand];
|
|
|
|
async function testClaudeToGemini(CLAUDE_MODEL, GEMINI_MODEL) {
|
|
console.log('='.repeat(60));
|
|
console.log('TEST: Claude → Gemini Cross-Model Switch');
|
|
console.log('Simulates starting with Claude, then switching to Gemini');
|
|
console.log('='.repeat(60));
|
|
console.log('');
|
|
|
|
const claudeConfig = getModelConfig('claude');
|
|
const geminiConfig = getModelConfig('gemini');
|
|
|
|
// TURN 1: Get response from Claude with thinking + tool use
|
|
console.log('TURN 1: Request to Claude (get thinking signature)');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn1Messages = [
|
|
{ role: 'user', content: 'Run the command "ls -la" to list files.' }
|
|
];
|
|
|
|
const turn1Result = await streamRequest({
|
|
model: CLAUDE_MODEL,
|
|
max_tokens: claudeConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: claudeConfig.thinking,
|
|
messages: turn1Messages
|
|
});
|
|
|
|
const turn1Content = analyzeContent(turn1Result.content);
|
|
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
|
|
if (!turn1Content.hasToolUse) {
|
|
console.log(' SKIP: No tool use in turn 1');
|
|
return { passed: false, skipped: true };
|
|
}
|
|
|
|
// Extract thinking and tool_use for the assistant message
|
|
const assistantContent = [];
|
|
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
|
assistantContent.push({
|
|
type: 'thinking',
|
|
thinking: turn1Content.thinking[0].thinking,
|
|
signature: turn1Content.thinking[0].signature || ''
|
|
});
|
|
}
|
|
if (turn1Content.hasText && turn1Content.text[0]) {
|
|
assistantContent.push({
|
|
type: 'text',
|
|
text: turn1Content.text[0].text
|
|
});
|
|
}
|
|
for (const tool of turn1Content.toolUse) {
|
|
assistantContent.push({
|
|
type: 'tool_use',
|
|
id: tool.id,
|
|
name: tool.name,
|
|
input: tool.input
|
|
});
|
|
}
|
|
|
|
const signatureLength = turn1Content.thinking[0]?.signature?.length || 0;
|
|
console.log(` Claude signature length: ${signatureLength}`);
|
|
|
|
// TURN 2: Switch to Gemini with Claude's thinking signature in history
|
|
console.log('\nTURN 2: Request to Gemini (with Claude thinking in history)');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn2Messages = [
|
|
{ role: 'user', content: 'Run the command "ls -la" to list files.' },
|
|
{ role: 'assistant', content: assistantContent },
|
|
{
|
|
role: 'user',
|
|
content: [{
|
|
type: 'tool_result',
|
|
tool_use_id: turn1Content.toolUse[0].id,
|
|
content: 'total 16\ndrwxr-xr-x 5 user staff 160 Jan 1 12:00 .\ndrwxr-xr-x 3 user staff 96 Jan 1 12:00 ..\n-rw-r--r-- 1 user staff 100 Jan 1 12:00 file.txt'
|
|
}]
|
|
}
|
|
];
|
|
|
|
try {
|
|
const turn2Result = await streamRequest({
|
|
model: GEMINI_MODEL,
|
|
max_tokens: geminiConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: geminiConfig.thinking,
|
|
messages: turn2Messages
|
|
});
|
|
|
|
const turn2Content = analyzeContent(turn2Result.content);
|
|
console.log(` Response received: YES`);
|
|
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
|
console.log(` Error: NO`);
|
|
|
|
// Success if we got any response without error
|
|
const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
|
|
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
|
return { passed };
|
|
} catch (error) {
|
|
console.log(` Error: ${error.message}`);
|
|
console.log(` Result: FAIL`);
|
|
return { passed: false, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function testGeminiToClaude(CLAUDE_MODEL, GEMINI_MODEL) {
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('TEST: Gemini → Claude Cross-Model Switch');
|
|
console.log('Simulates starting with Gemini, then switching to Claude');
|
|
console.log('='.repeat(60));
|
|
console.log('');
|
|
|
|
const claudeConfig = getModelConfig('claude');
|
|
const geminiConfig = getModelConfig('gemini');
|
|
|
|
// TURN 1: Get response from Gemini with thinking + tool use
|
|
console.log('TURN 1: Request to Gemini (get thinking signature)');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn1Messages = [
|
|
{ role: 'user', content: 'Run the command "pwd" to show current directory.' }
|
|
];
|
|
|
|
const turn1Result = await streamRequest({
|
|
model: GEMINI_MODEL,
|
|
max_tokens: geminiConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: geminiConfig.thinking,
|
|
messages: turn1Messages
|
|
});
|
|
|
|
const turn1Content = analyzeContent(turn1Result.content);
|
|
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
|
|
if (!turn1Content.hasToolUse) {
|
|
console.log(' SKIP: No tool use in turn 1');
|
|
return { passed: false, skipped: true };
|
|
}
|
|
|
|
// Extract content for the assistant message
|
|
const assistantContent = [];
|
|
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
|
assistantContent.push({
|
|
type: 'thinking',
|
|
thinking: turn1Content.thinking[0].thinking,
|
|
signature: turn1Content.thinking[0].signature || ''
|
|
});
|
|
}
|
|
if (turn1Content.hasText && turn1Content.text[0]) {
|
|
assistantContent.push({
|
|
type: 'text',
|
|
text: turn1Content.text[0].text
|
|
});
|
|
}
|
|
for (const tool of turn1Content.toolUse) {
|
|
const toolBlock = {
|
|
type: 'tool_use',
|
|
id: tool.id,
|
|
name: tool.name,
|
|
input: tool.input
|
|
};
|
|
// Include thoughtSignature if present (Gemini puts it on tool_use)
|
|
if (tool.thoughtSignature) {
|
|
toolBlock.thoughtSignature = tool.thoughtSignature;
|
|
}
|
|
assistantContent.push(toolBlock);
|
|
}
|
|
|
|
const thinkingSigLength = turn1Content.thinking[0]?.signature?.length || 0;
|
|
const toolUseSigLength = turn1Content.toolUse[0]?.thoughtSignature?.length || 0;
|
|
console.log(` Gemini thinking signature length: ${thinkingSigLength}`);
|
|
console.log(` Gemini tool_use signature length: ${toolUseSigLength}`);
|
|
|
|
// TURN 2: Switch to Claude with Gemini's thinking signature in history
|
|
console.log('\nTURN 2: Request to Claude (with Gemini thinking in history)');
|
|
console.log('-'.repeat(40));
|
|
console.log(` Assistant content being sent: ${JSON.stringify(assistantContent).substring(0, 400)}`);
|
|
|
|
const turn2Messages = [
|
|
{ role: 'user', content: 'Run the command "pwd" to show current directory.' },
|
|
{ role: 'assistant', content: assistantContent },
|
|
{
|
|
role: 'user',
|
|
content: [{
|
|
type: 'tool_result',
|
|
tool_use_id: turn1Content.toolUse[0].id,
|
|
content: '/home/user/projects'
|
|
}]
|
|
}
|
|
];
|
|
|
|
try {
|
|
const turn2Result = await streamRequest({
|
|
model: CLAUDE_MODEL,
|
|
max_tokens: claudeConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: claudeConfig.thinking,
|
|
messages: turn2Messages
|
|
});
|
|
|
|
const turn2Content = analyzeContent(turn2Result.content);
|
|
console.log(` Response received: YES`);
|
|
console.log(` Stop reason: ${turn2Result.stop_reason}`);
|
|
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn2Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
console.log(` Raw content: ${JSON.stringify(turn2Result.content).substring(0, 300)}`);
|
|
console.log(` Error: NO`);
|
|
|
|
// Success if we got any response without error
|
|
const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
|
|
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
|
return { passed };
|
|
} catch (error) {
|
|
console.log(` Error: ${error.message}`);
|
|
console.log(` Result: FAIL`);
|
|
return { passed: false, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function testGeminiToClaudeColdCache(CLAUDE_MODEL, GEMINI_MODEL) {
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('TEST: Gemini → Claude Cross-Model Switch (COLD CACHE)');
|
|
console.log('Simulates: thinking block with NO signature (stripped by Claude Code)');
|
|
console.log('Expected error without fix: "Expected thinking but found text"');
|
|
console.log('='.repeat(60));
|
|
console.log('');
|
|
|
|
const claudeConfig = getModelConfig('claude');
|
|
const geminiConfig = getModelConfig('gemini');
|
|
|
|
// TURN 1: Get response from Gemini with tool use
|
|
console.log('TURN 1: Request to Gemini (get tool_use)');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn1Messages = [
|
|
{ role: 'user', content: 'Run the command "whoami" to show current user.' }
|
|
];
|
|
|
|
const turn1Result = await streamRequest({
|
|
model: GEMINI_MODEL,
|
|
max_tokens: geminiConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: geminiConfig.thinking,
|
|
messages: turn1Messages
|
|
});
|
|
|
|
const turn1Content = analyzeContent(turn1Result.content);
|
|
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
|
|
if (!turn1Content.hasToolUse) {
|
|
console.log(' SKIP: No tool use in turn 1');
|
|
return { passed: false, skipped: true };
|
|
}
|
|
|
|
// Build assistant content simulating what Claude Code sends back
|
|
// CRITICAL: No signature on thinking block - simulates Claude Code stripping it
|
|
const assistantContent = [];
|
|
|
|
// Add thinking block WITHOUT signature - this is what causes the issue
|
|
// Claude Code strips signatures it doesn't understand
|
|
assistantContent.push({
|
|
type: 'thinking',
|
|
thinking: turn1Content.hasThinking && turn1Content.thinking[0]
|
|
? turn1Content.thinking[0].thinking
|
|
: 'I need to run the whoami command.'
|
|
// NO signature field - simulating Claude Code stripping it
|
|
});
|
|
|
|
// Add text block
|
|
assistantContent.push({
|
|
type: 'text',
|
|
text: turn1Content.hasText && turn1Content.text[0]
|
|
? turn1Content.text[0].text
|
|
: 'I will run the whoami command for you.'
|
|
});
|
|
|
|
// Add tool_use blocks (also without thoughtSignature)
|
|
for (const tool of turn1Content.toolUse) {
|
|
assistantContent.push({
|
|
type: 'tool_use',
|
|
id: tool.id,
|
|
name: tool.name,
|
|
input: tool.input
|
|
// NO thoughtSignature - Claude Code strips this too
|
|
});
|
|
}
|
|
|
|
console.log(` Built assistant content with UNSIGNED thinking block`);
|
|
|
|
// TURN 2: Switch to Claude with unsigned thinking in history
|
|
console.log('\nTURN 2: Request to Claude (with UNSIGNED thinking block)');
|
|
console.log('-'.repeat(40));
|
|
console.log(` Assistant content: ${JSON.stringify(assistantContent).substring(0, 300)}...`);
|
|
|
|
const turn2Messages = [
|
|
{ role: 'user', content: 'Run the command "whoami" to show current user.' },
|
|
{ role: 'assistant', content: assistantContent },
|
|
{
|
|
role: 'user',
|
|
content: [{
|
|
type: 'tool_result',
|
|
tool_use_id: turn1Content.toolUse[0].id,
|
|
content: 'testuser'
|
|
}]
|
|
}
|
|
];
|
|
|
|
try {
|
|
const turn2Result = await streamRequest({
|
|
model: CLAUDE_MODEL,
|
|
max_tokens: claudeConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: claudeConfig.thinking,
|
|
messages: turn2Messages
|
|
});
|
|
|
|
const turn2Content = analyzeContent(turn2Result.content);
|
|
console.log(` Response received: YES`);
|
|
console.log(` Stop reason: ${turn2Result.stop_reason}`);
|
|
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn2Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
|
|
// Success if we got any response without error
|
|
const passed = turn2Content.hasText || turn2Content.hasThinking || turn2Content.hasToolUse;
|
|
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
|
return { passed };
|
|
} catch (error) {
|
|
// Check for the specific error from issue #120
|
|
const isExpectedError = error.message.includes('Expected') &&
|
|
error.message.includes('thinking') &&
|
|
error.message.includes('found');
|
|
console.log(` Error: ${error.message.substring(0, 200)}`);
|
|
console.log(` Is issue #120 error: ${isExpectedError ? 'YES' : 'NO'}`);
|
|
console.log(` Result: FAIL`);
|
|
return { passed: false, error: error.message, isIssue120Error: isExpectedError };
|
|
}
|
|
}
|
|
|
|
async function testSameModelContinuation(CLAUDE_MODEL) {
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('TEST: Same Model Continuation - Claude (Control Test)');
|
|
console.log('Verifies same-model multi-turn still works');
|
|
console.log('='.repeat(60));
|
|
console.log('');
|
|
|
|
const claudeConfig = getModelConfig('claude');
|
|
|
|
// TURN 1: Get response from Claude
|
|
console.log('TURN 1: Request to Claude');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn1Messages = [
|
|
{ role: 'user', content: 'Run "echo hello" command.' }
|
|
];
|
|
|
|
const turn1Result = await streamRequest({
|
|
model: CLAUDE_MODEL,
|
|
max_tokens: claudeConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: claudeConfig.thinking,
|
|
messages: turn1Messages
|
|
});
|
|
|
|
const turn1Content = analyzeContent(turn1Result.content);
|
|
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
|
|
if (!turn1Content.hasToolUse) {
|
|
console.log(' SKIP: No tool use in turn 1');
|
|
return { passed: false, skipped: true };
|
|
}
|
|
|
|
// Build assistant message
|
|
const assistantContent = [];
|
|
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
|
assistantContent.push({
|
|
type: 'thinking',
|
|
thinking: turn1Content.thinking[0].thinking,
|
|
signature: turn1Content.thinking[0].signature || ''
|
|
});
|
|
}
|
|
if (turn1Content.hasText && turn1Content.text[0]) {
|
|
assistantContent.push({
|
|
type: 'text',
|
|
text: turn1Content.text[0].text
|
|
});
|
|
}
|
|
for (const tool of turn1Content.toolUse) {
|
|
assistantContent.push({
|
|
type: 'tool_use',
|
|
id: tool.id,
|
|
name: tool.name,
|
|
input: tool.input
|
|
});
|
|
}
|
|
|
|
// TURN 2: Continue with same model
|
|
console.log('\nTURN 2: Continue with Claude (same model)');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn2Messages = [
|
|
{ role: 'user', content: 'Run "echo hello" command.' },
|
|
{ role: 'assistant', content: assistantContent },
|
|
{
|
|
role: 'user',
|
|
content: [{
|
|
type: 'tool_result',
|
|
tool_use_id: turn1Content.toolUse[0].id,
|
|
content: 'hello'
|
|
}]
|
|
}
|
|
];
|
|
|
|
try {
|
|
const turn2Result = await streamRequest({
|
|
model: CLAUDE_MODEL,
|
|
max_tokens: claudeConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: claudeConfig.thinking,
|
|
messages: turn2Messages
|
|
});
|
|
|
|
const turn2Content = analyzeContent(turn2Result.content);
|
|
console.log(` Response received: YES`);
|
|
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
|
console.log(` Error: NO`);
|
|
|
|
// For same model, we should preserve thinking with valid signature
|
|
const passed = turn2Content.hasText || turn2Content.hasThinking;
|
|
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
|
return { passed };
|
|
} catch (error) {
|
|
console.log(` Error: ${error.message}`);
|
|
console.log(` Result: FAIL`);
|
|
return { passed: false, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function testSameModelContinuationGemini(GEMINI_MODEL) {
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('TEST: Same Model Continuation - Gemini (Control Test)');
|
|
console.log('Verifies same-model multi-turn still works for Gemini');
|
|
console.log('='.repeat(60));
|
|
console.log('');
|
|
|
|
const geminiConfig = getModelConfig('gemini');
|
|
|
|
// TURN 1: Get response from Gemini
|
|
console.log('TURN 1: Request to Gemini');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn1Messages = [
|
|
{ role: 'user', content: 'Run "echo world" command.' }
|
|
];
|
|
|
|
const turn1Result = await streamRequest({
|
|
model: GEMINI_MODEL,
|
|
max_tokens: geminiConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: geminiConfig.thinking,
|
|
messages: turn1Messages
|
|
});
|
|
|
|
const turn1Content = analyzeContent(turn1Result.content);
|
|
console.log(` Thinking: ${turn1Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn1Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Tool Use: ${turn1Content.hasToolUse ? 'YES' : 'NO'}`);
|
|
|
|
if (!turn1Content.hasToolUse) {
|
|
console.log(' SKIP: No tool use in turn 1');
|
|
return { passed: false, skipped: true };
|
|
}
|
|
|
|
// Build assistant message
|
|
const assistantContent = [];
|
|
if (turn1Content.hasThinking && turn1Content.thinking[0]) {
|
|
assistantContent.push({
|
|
type: 'thinking',
|
|
thinking: turn1Content.thinking[0].thinking,
|
|
signature: turn1Content.thinking[0].signature || ''
|
|
});
|
|
}
|
|
if (turn1Content.hasText && turn1Content.text[0]) {
|
|
assistantContent.push({
|
|
type: 'text',
|
|
text: turn1Content.text[0].text
|
|
});
|
|
}
|
|
for (const tool of turn1Content.toolUse) {
|
|
const toolBlock = {
|
|
type: 'tool_use',
|
|
id: tool.id,
|
|
name: tool.name,
|
|
input: tool.input
|
|
};
|
|
// Include thoughtSignature if present (Gemini puts it on tool_use)
|
|
if (tool.thoughtSignature) {
|
|
toolBlock.thoughtSignature = tool.thoughtSignature;
|
|
}
|
|
assistantContent.push(toolBlock);
|
|
}
|
|
|
|
// TURN 2: Continue with same model
|
|
console.log('\nTURN 2: Continue with Gemini (same model)');
|
|
console.log('-'.repeat(40));
|
|
|
|
const turn2Messages = [
|
|
{ role: 'user', content: 'Run "echo world" command.' },
|
|
{ role: 'assistant', content: assistantContent },
|
|
{
|
|
role: 'user',
|
|
content: [{
|
|
type: 'tool_result',
|
|
tool_use_id: turn1Content.toolUse[0].id,
|
|
content: 'world'
|
|
}]
|
|
}
|
|
];
|
|
|
|
try {
|
|
const turn2Result = await streamRequest({
|
|
model: GEMINI_MODEL,
|
|
max_tokens: geminiConfig.max_tokens,
|
|
stream: true,
|
|
tools,
|
|
thinking: geminiConfig.thinking,
|
|
messages: turn2Messages
|
|
});
|
|
|
|
const turn2Content = analyzeContent(turn2Result.content);
|
|
console.log(` Response received: YES`);
|
|
console.log(` Thinking: ${turn2Content.hasThinking ? 'YES' : 'NO'}`);
|
|
console.log(` Signature: ${turn2Content.hasSignature ? 'YES' : 'NO'}`);
|
|
console.log(` Text: ${turn2Content.hasText ? 'YES' : 'NO'}`);
|
|
console.log(` Error: NO`);
|
|
|
|
// For same model, we should get a response
|
|
const passed = turn2Content.hasText || turn2Content.hasThinking;
|
|
console.log(` Result: ${passed ? 'PASS' : 'FAIL'}`);
|
|
return { passed };
|
|
} catch (error) {
|
|
console.log(` Error: ${error.message}`);
|
|
console.log(` Result: FAIL`);
|
|
return { passed: false, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
// Load models once from constants
|
|
const TEST_MODELS = await getModels();
|
|
const CLAUDE_MODEL = TEST_MODELS.claude;
|
|
const GEMINI_MODEL = TEST_MODELS.gemini;
|
|
|
|
console.log('\n');
|
|
console.log('╔' + '═'.repeat(58) + '╗');
|
|
console.log('║' + ' CROSS-MODEL THINKING SIGNATURE TEST SUITE '.padEnd(58) + '║');
|
|
console.log('║' + ' Tests switching between Claude and Gemini '.padEnd(58) + '║');
|
|
console.log('╚' + '═'.repeat(58) + '╝');
|
|
console.log('\n');
|
|
|
|
const results = [];
|
|
|
|
// Test 1: Claude → Gemini
|
|
const claudeToGemini = await testClaudeToGemini(CLAUDE_MODEL, GEMINI_MODEL);
|
|
results.push({ name: 'Claude → Gemini', ...claudeToGemini });
|
|
|
|
// Test 2: Gemini → Claude
|
|
const geminiToClaude = await testGeminiToClaude(CLAUDE_MODEL, GEMINI_MODEL);
|
|
results.push({ name: 'Gemini → Claude', ...geminiToClaude });
|
|
|
|
// Test 3: Gemini → Claude with COLD CACHE (simulates cache expiry)
|
|
const geminiToClaudeCold = await testGeminiToClaudeColdCache(CLAUDE_MODEL, GEMINI_MODEL);
|
|
results.push({ name: 'Gemini → Claude (Cold Cache)', ...geminiToClaudeCold });
|
|
|
|
// Test 4: Same model Claude (control)
|
|
const sameModelClaude = await testSameModelContinuation(CLAUDE_MODEL);
|
|
results.push({ name: 'Same Model (Claude → Claude)', ...sameModelClaude });
|
|
|
|
// Test 5: Same model Gemini (control)
|
|
const sameModelGemini = await testSameModelContinuationGemini(GEMINI_MODEL);
|
|
results.push({ name: 'Same Model (Gemini → Gemini)', ...sameModelGemini });
|
|
|
|
// Summary
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('SUMMARY');
|
|
console.log('='.repeat(60));
|
|
|
|
let allPassed = true;
|
|
for (const result of results) {
|
|
const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL');
|
|
console.log(` [${status}] ${result.name}`);
|
|
if (!result.passed && !result.skipped) allPassed = false;
|
|
}
|
|
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log(`FINAL RESULT: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
|
|
console.log('='.repeat(60));
|
|
|
|
process.exit(allPassed ? 0 : 1);
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error('Test error:', err);
|
|
process.exit(1);
|
|
});
|