initial commit

This commit is contained in:
Badri Narayanan S
2025-12-19 19:20:28 +05:30
parent 52d72b7bff
commit 5ae29947b1
18 changed files with 3925 additions and 494 deletions

100
tests/run-all.cjs Normal file
View File

@@ -0,0 +1,100 @@
#!/usr/bin/env node
/**
* Test Runner
*
* Runs all tests in sequence and reports results.
* Usage: node tests/run-all.cjs
*/
const { spawn } = require('child_process');
const path = require('path');
const tests = [
{ name: 'Thinking Signatures', file: 'test-thinking-signatures.cjs' },
{ name: 'Multi-turn Tools (Non-Streaming)', file: 'test-multiturn-thinking-tools.cjs' },
{ name: 'Multi-turn Tools (Streaming)', file: 'test-multiturn-thinking-tools-streaming.cjs' },
{ name: 'Interleaved Thinking', file: 'test-interleaved-thinking.cjs' },
{ name: 'Image Support', file: 'test-images.cjs' }
];
async function runTest(test) {
return new Promise((resolve) => {
const testPath = path.join(__dirname, test.file);
const child = spawn('node', [testPath], {
stdio: 'inherit'
});
child.on('close', (code) => {
resolve({ ...test, passed: code === 0 });
});
child.on('error', (err) => {
console.error(`Error running ${test.name}:`, err);
resolve({ ...test, passed: false });
});
});
}
async function main() {
console.log('╔══════════════════════════════════════════════════════════════╗');
console.log('║ ANTIGRAVITY PROXY TEST SUITE ║');
console.log('╚══════════════════════════════════════════════════════════════╝');
console.log('');
console.log('Make sure the server is running on port 8080 before running tests.');
console.log('');
// Check if running specific test
const specificTest = process.argv[2];
let testsToRun = tests;
if (specificTest) {
testsToRun = tests.filter(t =>
t.file.includes(specificTest) || t.name.toLowerCase().includes(specificTest.toLowerCase())
);
if (testsToRun.length === 0) {
console.log(`No test found matching: ${specificTest}`);
console.log('\nAvailable tests:');
tests.forEach(t => console.log(` - ${t.name} (${t.file})`));
process.exit(1);
}
}
const results = [];
for (const test of testsToRun) {
console.log('\n');
console.log('╔' + '═'.repeat(60) + '╗');
console.log('║ Running: ' + test.name.padEnd(50) + '║');
console.log('╚' + '═'.repeat(60) + '╝');
console.log('');
const result = await runTest(test);
results.push(result);
console.log('\n');
}
// Summary
console.log('╔══════════════════════════════════════════════════════════════╗');
console.log('║ FINAL RESULTS ║');
console.log('╠══════════════════════════════════════════════════════════════╣');
let allPassed = true;
for (const result of results) {
const status = result.passed ? '✓ PASS' : '✗ FAIL';
const statusColor = result.passed ? '' : '';
console.log(`${status.padEnd(8)} ${result.name.padEnd(50)}`);
if (!result.passed) allPassed = false;
}
console.log('╠══════════════════════════════════════════════════════════════╣');
const overallStatus = allPassed ? '✓ ALL TESTS PASSED' : '✗ SOME TESTS FAILED';
console.log(`${overallStatus.padEnd(60)}`);
console.log('╚══════════════════════════════════════════════════════════════╝');
process.exit(allPassed ? 0 : 1);
}
main().catch(err => {
console.error('Test runner failed:', err);
process.exit(1);
});

240
tests/test-images.cjs Normal file
View File

@@ -0,0 +1,240 @@
/**
* Image Support Test
*
* Tests that images can be sent to the API with thinking models.
* Simulates Claude Code sending screenshots or images for analysis.
*/
const http = require('http');
const fs = require('fs');
const path = require('path');
const BASE_URL = 'localhost';
const PORT = 8080;
// Load test image from disk
const TEST_IMAGE_PATH = path.join(__dirname, 'utils', 'test_image.jpeg');
const TEST_IMAGE_BASE64 = fs.readFileSync(TEST_IMAGE_PATH).toString('base64');
function streamRequest(body) {
return new Promise((resolve, reject) => {
const data = JSON.stringify(body);
const req = http.request({
host: BASE_URL,
port: PORT,
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'anthropic-beta': 'interleaved-thinking-2025-05-14',
'Content-Length': Buffer.byteLength(data)
}
}, res => {
const events = [];
let fullData = '';
res.on('data', chunk => {
fullData += chunk.toString();
});
res.on('end', () => {
const parts = fullData.split('\n\n').filter(e => e.trim());
for (const part of parts) {
const lines = part.split('\n');
const eventLine = lines.find(l => l.startsWith('event:'));
const dataLine = lines.find(l => l.startsWith('data:'));
if (eventLine && dataLine) {
try {
const eventType = eventLine.replace('event:', '').trim();
const eventData = JSON.parse(dataLine.replace('data:', '').trim());
events.push({ type: eventType, data: eventData });
} catch (e) { }
}
}
const content = [];
let currentBlock = null;
for (const event of events) {
if (event.type === 'content_block_start') {
currentBlock = { ...event.data.content_block };
if (currentBlock.type === 'thinking') {
currentBlock.thinking = '';
currentBlock.signature = '';
}
if (currentBlock.type === 'text') currentBlock.text = '';
} else if (event.type === 'content_block_delta') {
const delta = event.data.delta;
if (delta.type === 'thinking_delta' && currentBlock) {
currentBlock.thinking += delta.thinking || '';
}
if (delta.type === 'signature_delta' && currentBlock) {
currentBlock.signature += delta.signature || '';
}
if (delta.type === 'text_delta' && currentBlock) {
currentBlock.text += delta.text || '';
}
} else if (event.type === 'content_block_stop') {
if (currentBlock) content.push(currentBlock);
currentBlock = null;
}
}
const errorEvent = events.find(e => e.type === 'error');
if (errorEvent) {
resolve({ content, events, error: errorEvent.data.error, statusCode: res.statusCode });
} else {
resolve({ content, events, statusCode: res.statusCode });
}
});
});
req.on('error', reject);
req.write(data);
req.end();
});
}
async function runTests() {
console.log('='.repeat(60));
console.log('IMAGE SUPPORT TEST');
console.log('Tests image processing with thinking models');
console.log('='.repeat(60));
console.log('');
let allPassed = true;
const results = [];
// ===== TEST 1: Single image with question =====
console.log('TEST 1: Single image with question');
console.log('-'.repeat(40));
const result1 = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 2048,
stream: true,
thinking: { type: 'enabled', budget_tokens: 8000 },
messages: [{
role: 'user',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/jpeg',
data: TEST_IMAGE_BASE64
}
},
{
type: 'text',
text: 'What do you see in this image? Describe it briefly.'
}
]
}]
});
if (result1.error) {
console.log(` ERROR: ${result1.error.message}`);
allPassed = false;
results.push({ name: 'Single image processing', passed: false });
} else {
const thinking = result1.content.filter(b => b.type === 'thinking');
const text = result1.content.filter(b => b.type === 'text');
console.log(` Thinking: ${thinking.length > 0 ? 'YES' : 'NO'}`);
console.log(` Text response: ${text.length > 0 ? 'YES' : 'NO'}`);
if (thinking.length > 0) {
console.log(` Thinking: "${thinking[0].thinking?.substring(0, 60)}..."`);
}
if (text.length > 0) {
console.log(` Response: "${text[0].text?.substring(0, 100)}..."`);
}
const passed = thinking.length > 0 && text.length > 0;
results.push({ name: 'Single image processing', passed });
if (!passed) allPassed = false;
}
// ===== TEST 2: Image + text in multi-turn =====
console.log('\nTEST 2: Image in multi-turn conversation');
console.log('-'.repeat(40));
const result2 = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 2048,
stream: true,
thinking: { type: 'enabled', budget_tokens: 8000 },
messages: [
{
role: 'user',
content: 'I will show you an image.'
},
{
role: 'assistant',
content: [{
type: 'text',
text: 'Sure, please share the image and I\'ll help analyze it.'
}]
},
{
role: 'user',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/jpeg',
data: TEST_IMAGE_BASE64
}
},
{
type: 'text',
text: 'Here is the image. What do you see?'
}
]
}
]
});
if (result2.error) {
console.log(` ERROR: ${result2.error.message}`);
allPassed = false;
results.push({ name: 'Image in multi-turn', passed: false });
} else {
const thinking = result2.content.filter(b => b.type === 'thinking');
const text = result2.content.filter(b => b.type === 'text');
console.log(` Thinking: ${thinking.length > 0 ? 'YES' : 'NO'}`);
console.log(` Text response: ${text.length > 0 ? 'YES' : 'NO'}`);
if (text.length > 0) {
console.log(` Response: "${text[0].text?.substring(0, 80)}..."`);
}
const passed = text.length > 0;
results.push({ name: 'Image in multi-turn', passed });
if (!passed) allPassed = false;
}
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
for (const result of results) {
const status = result.passed ? 'PASS' : 'FAIL';
console.log(` [${status}] ${result.name}`);
}
console.log('\n' + '='.repeat(60));
console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);
}
runTests().catch(err => {
console.error('Test failed with error:', err);
process.exit(1);
});

View File

@@ -0,0 +1,285 @@
/**
* Interleaved Thinking Test
*
* Tests that interleaved thinking works correctly:
* - Multiple thinking blocks can appear in a single response
* - Thinking blocks between tool calls
* - Thinking after tool results
*
* This simulates complex Claude Code scenarios where the model
* thinks multiple times during a single turn.
*/
const http = require('http');
const BASE_URL = 'localhost';
const PORT = 8080;
function streamRequest(body) {
return new Promise((resolve, reject) => {
const data = JSON.stringify(body);
const req = http.request({
host: BASE_URL,
port: PORT,
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'anthropic-beta': 'interleaved-thinking-2025-05-14',
'Content-Length': Buffer.byteLength(data)
}
}, res => {
const events = [];
let fullData = '';
res.on('data', chunk => {
fullData += chunk.toString();
});
res.on('end', () => {
const parts = fullData.split('\n\n').filter(e => e.trim());
for (const part of parts) {
const lines = part.split('\n');
const eventLine = lines.find(l => l.startsWith('event:'));
const dataLine = lines.find(l => l.startsWith('data:'));
if (eventLine && dataLine) {
try {
const eventType = eventLine.replace('event:', '').trim();
const eventData = JSON.parse(dataLine.replace('data:', '').trim());
events.push({ type: eventType, data: eventData });
} catch (e) { }
}
}
const content = [];
let currentBlock = null;
for (const event of events) {
if (event.type === 'content_block_start') {
currentBlock = { ...event.data.content_block };
if (currentBlock.type === 'thinking') {
currentBlock.thinking = '';
currentBlock.signature = '';
}
if (currentBlock.type === 'text') currentBlock.text = '';
} else if (event.type === 'content_block_delta') {
const delta = event.data.delta;
if (delta.type === 'thinking_delta' && currentBlock) {
currentBlock.thinking += delta.thinking || '';
}
if (delta.type === 'signature_delta' && currentBlock) {
currentBlock.signature += delta.signature || '';
}
if (delta.type === 'text_delta' && currentBlock) {
currentBlock.text += delta.text || '';
}
if (delta.type === 'input_json_delta' && currentBlock) {
currentBlock.partial_json = (currentBlock.partial_json || '') + delta.partial_json;
}
} else if (event.type === 'content_block_stop') {
if (currentBlock?.type === 'tool_use' && currentBlock.partial_json) {
try { currentBlock.input = JSON.parse(currentBlock.partial_json); } catch (e) { }
delete currentBlock.partial_json;
}
if (currentBlock) content.push(currentBlock);
currentBlock = null;
}
}
const errorEvent = events.find(e => e.type === 'error');
if (errorEvent) {
resolve({ content, events, error: errorEvent.data.error, statusCode: res.statusCode });
} else {
resolve({ content, events, statusCode: res.statusCode });
}
});
});
req.on('error', reject);
req.write(data);
req.end();
});
}
// Multiple tools to encourage interleaved thinking
const tools = [{
name: 'read_file',
description: 'Read a file',
input_schema: {
type: 'object',
properties: { path: { type: 'string' } },
required: ['path']
}
}, {
name: 'write_file',
description: 'Write to a file',
input_schema: {
type: 'object',
properties: {
path: { type: 'string' },
content: { type: 'string' }
},
required: ['path', 'content']
}
}, {
name: 'run_tests',
description: 'Run test suite',
input_schema: {
type: 'object',
properties: { pattern: { type: 'string' } },
required: ['pattern']
}
}];
async function runTests() {
console.log('='.repeat(60));
console.log('INTERLEAVED THINKING TEST');
console.log('Tests complex multi-step reasoning with tools');
console.log('='.repeat(60));
console.log('');
let allPassed = true;
const results = [];
// ===== TEST 1: Complex task requiring multiple steps =====
console.log('TEST 1: Complex task - read, modify, write, test');
console.log('-'.repeat(40));
const result = await streamRequest({
model: 'claude-opus-4-5-thinking',
max_tokens: 8192,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 16000 },
messages: [{
role: 'user',
content: `I need you to:
1. Read the file src/config.js
2. Add a new config option "debug: true"
3. Write the updated file
4. Run the tests to make sure nothing broke
Please do this step by step, reading each file before modifying.`
}]
});
if (result.error) {
console.log(` ERROR: ${result.error.message}`);
allPassed = false;
results.push({ name: 'Complex multi-step task', passed: false });
} else {
const thinking = result.content.filter(b => b.type === 'thinking');
const toolUse = result.content.filter(b => b.type === 'tool_use');
const text = result.content.filter(b => b.type === 'text');
console.log(` Thinking blocks: ${thinking.length}`);
console.log(` Tool use blocks: ${toolUse.length}`);
console.log(` Text blocks: ${text.length}`);
// Check signatures
const signedThinking = thinking.filter(t => t.signature && t.signature.length >= 50);
console.log(` Signed thinking blocks: ${signedThinking.length}`);
// Analyze block order
const blockOrder = result.content.map(b => b.type).join(' -> ');
console.log(` Block order: ${blockOrder}`);
// Show thinking previews
thinking.forEach((t, i) => {
console.log(` Thinking ${i + 1}: "${(t.thinking || '').substring(0, 50)}..."`);
});
// Show tool calls
toolUse.forEach((t, i) => {
console.log(` Tool ${i + 1}: ${t.name}(${JSON.stringify(t.input).substring(0, 50)}...)`);
});
// Expect at least one thinking block (ideally multiple for complex task)
const passed = thinking.length >= 1 && signedThinking.length >= 1 && toolUse.length >= 1;
results.push({ name: 'Thinking + Tools in complex task', passed });
if (!passed) allPassed = false;
}
// ===== TEST 2: Multiple tool calls in sequence =====
console.log('\nTEST 2: Tool result followed by more thinking');
console.log('-'.repeat(40));
// Start with previous result and add tool result
if (result.content && result.content.some(b => b.type === 'tool_use')) {
const toolUseBlock = result.content.find(b => b.type === 'tool_use');
const result2 = await streamRequest({
model: 'claude-opus-4-5-thinking',
max_tokens: 8192,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 16000 },
messages: [
{
role: 'user',
content: `Read src/config.js and tell me if debug mode is enabled.`
},
{ role: 'assistant', content: result.content },
{
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: toolUseBlock.id,
content: `module.exports = {
port: 3000,
host: 'localhost',
debug: false
};`
}]
}
]
});
if (result2.error) {
console.log(` ERROR: ${result2.error.message}`);
allPassed = false;
results.push({ name: 'Thinking after tool result', passed: false });
} else {
const thinking2 = result2.content.filter(b => b.type === 'thinking');
const text2 = result2.content.filter(b => b.type === 'text');
const toolUse2 = result2.content.filter(b => b.type === 'tool_use');
console.log(` Thinking blocks: ${thinking2.length}`);
console.log(` Text blocks: ${text2.length}`);
console.log(` Tool use blocks: ${toolUse2.length}`);
if (text2.length > 0) {
console.log(` Response: "${text2[0].text?.substring(0, 80)}..."`);
}
// Should have thinking after receiving tool result
const passed = thinking2.length >= 1 && (text2.length > 0 || toolUse2.length > 0);
results.push({ name: 'Thinking after tool result', passed });
if (!passed) allPassed = false;
}
} else {
console.log(' SKIPPED - No tool use in previous test');
results.push({ name: 'Thinking after tool result', passed: false, skipped: true });
}
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
for (const result of results) {
const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL');
console.log(` [${status}] ${result.name}`);
}
console.log('\n' + '='.repeat(60));
console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);
}
runTests().catch(err => {
console.error('Test failed with error:', err);
process.exit(1);
});

View File

@@ -0,0 +1,301 @@
/**
* Multi-Turn Tool Call Test (Streaming)
*
* Simulates Claude Code's streaming multi-turn conversation pattern.
* Same flow as non-streaming but verifies:
* - SSE events are properly formatted
* - signature_delta events are present
* - Thinking blocks accumulate correctly across deltas
*/
const http = require('http');
const BASE_URL = 'localhost';
const PORT = 8080;
function streamRequest(body) {
return new Promise((resolve, reject) => {
const data = JSON.stringify(body);
const req = http.request({
host: BASE_URL,
port: PORT,
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'anthropic-beta': 'interleaved-thinking-2025-05-14',
'Content-Length': Buffer.byteLength(data)
}
}, res => {
const events = [];
let fullData = '';
res.on('data', chunk => {
fullData += chunk.toString();
});
res.on('end', () => {
// Parse SSE events
const parts = fullData.split('\n\n').filter(e => e.trim());
for (const part of parts) {
const lines = part.split('\n');
const eventLine = lines.find(l => l.startsWith('event:'));
const dataLine = lines.find(l => l.startsWith('data:'));
if (eventLine && dataLine) {
try {
const eventType = eventLine.replace('event:', '').trim();
const eventData = JSON.parse(dataLine.replace('data:', '').trim());
events.push({ type: eventType, data: eventData });
} catch (e) { }
}
}
// Build content from events
const content = [];
let currentBlock = null;
for (const event of events) {
if (event.type === 'content_block_start') {
currentBlock = { ...event.data.content_block };
if (currentBlock.type === 'thinking') {
currentBlock.thinking = '';
currentBlock.signature = '';
}
if (currentBlock.type === 'text') currentBlock.text = '';
} else if (event.type === 'content_block_delta') {
const delta = event.data.delta;
if (delta.type === 'thinking_delta' && currentBlock) {
currentBlock.thinking += delta.thinking || '';
}
if (delta.type === 'signature_delta' && currentBlock) {
currentBlock.signature += delta.signature || '';
}
if (delta.type === 'text_delta' && currentBlock) {
currentBlock.text += delta.text || '';
}
if (delta.type === 'input_json_delta' && currentBlock) {
currentBlock.partial_json = (currentBlock.partial_json || '') + delta.partial_json;
}
} else if (event.type === 'content_block_stop') {
if (currentBlock?.type === 'tool_use' && currentBlock.partial_json) {
try { currentBlock.input = JSON.parse(currentBlock.partial_json); } catch (e) { }
delete currentBlock.partial_json;
}
if (currentBlock) content.push(currentBlock);
currentBlock = null;
}
}
// Check for errors
const errorEvent = events.find(e => e.type === 'error');
if (errorEvent) {
resolve({
content,
events,
error: errorEvent.data.error,
statusCode: res.statusCode,
raw: fullData
});
} else {
resolve({ content, events, statusCode: res.statusCode, raw: fullData });
}
});
});
req.on('error', reject);
req.write(data);
req.end();
});
}
const tools = [{
name: 'execute_command',
description: 'Execute a shell command',
input_schema: {
type: 'object',
properties: {
command: { type: 'string', description: 'Command to execute' },
cwd: { type: 'string', description: 'Working directory' }
},
required: ['command']
}
}];
function analyzeContent(content) {
const thinking = content.filter(b => b.type === 'thinking');
const toolUse = content.filter(b => b.type === 'tool_use');
const text = content.filter(b => b.type === 'text');
return {
thinking,
toolUse,
text,
hasThinking: thinking.length > 0,
hasToolUse: toolUse.length > 0,
hasText: text.length > 0,
thinkingHasSignature: thinking.some(t => t.signature && t.signature.length >= 50)
};
}
function analyzeEvents(events) {
return {
messageStart: events.filter(e => e.type === 'message_start').length,
blockStart: events.filter(e => e.type === 'content_block_start').length,
blockDelta: events.filter(e => e.type === 'content_block_delta').length,
blockStop: events.filter(e => e.type === 'content_block_stop').length,
messageDelta: events.filter(e => e.type === 'message_delta').length,
messageStop: events.filter(e => e.type === 'message_stop').length,
thinkingDeltas: events.filter(e => e.data?.delta?.type === 'thinking_delta').length,
signatureDeltas: events.filter(e => e.data?.delta?.type === 'signature_delta').length,
textDeltas: events.filter(e => e.data?.delta?.type === 'text_delta').length,
inputJsonDeltas: events.filter(e => e.data?.delta?.type === 'input_json_delta').length
};
}
async function runTests() {
console.log('='.repeat(60));
console.log('MULTI-TURN TOOL CALL TEST (STREAMING)');
console.log('Simulates Claude Code streaming conversation');
console.log('='.repeat(60));
console.log('');
let messages = [];
let allPassed = true;
const results = [];
// ===== TURN 1: Initial request =====
console.log('TURN 1: User asks to run a command');
console.log('-'.repeat(40));
messages.push({
role: 'user',
content: 'Run "ls -la" in the current directory and tell me what files exist.'
});
const turn1 = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages
});
if (turn1.error) {
console.log(` ERROR: ${turn1.error.message}`);
allPassed = false;
results.push({ name: 'Turn 1: Streaming request', passed: false });
} else {
const content = analyzeContent(turn1.content);
const events = analyzeEvents(turn1.events);
console.log(' Content:');
console.log(` Thinking: ${content.hasThinking ? 'YES' : 'NO'} (${content.thinking.length} blocks)`);
console.log(` Signature: ${content.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Tool Use: ${content.hasToolUse ? 'YES' : 'NO'} (${content.toolUse.length} calls)`);
console.log(' Events:');
console.log(` message_start: ${events.messageStart}`);
console.log(` content_block_start/stop: ${events.blockStart}/${events.blockStop}`);
console.log(` thinking_delta: ${events.thinkingDeltas}`);
console.log(` signature_delta: ${events.signatureDeltas}`);
console.log(` input_json_delta: ${events.inputJsonDeltas}`);
if (content.hasThinking && content.thinking[0].thinking) {
console.log(` Thinking: "${content.thinking[0].thinking.substring(0, 60)}..."`);
}
if (content.hasToolUse) {
console.log(` Tool: ${content.toolUse[0].name}(${JSON.stringify(content.toolUse[0].input)})`);
}
const passed = content.hasThinking && content.thinkingHasSignature &&
events.signatureDeltas > 0 && content.hasToolUse;
results.push({ name: 'Turn 1: Thinking + Signature + Tool Use + Events', passed });
if (!passed) allPassed = false;
if (content.hasToolUse) {
messages.push({ role: 'assistant', content: turn1.content });
}
}
// ===== TURN 2: Provide tool result =====
if (messages.length >= 2) {
console.log('\nTURN 2: Provide command output, expect summary');
console.log('-'.repeat(40));
const lastAssistant = messages[messages.length - 1];
const toolUseBlock = lastAssistant.content.find(b => b.type === 'tool_use');
messages.push({
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: toolUseBlock.id,
content: `total 32
drwxr-xr-x 10 user staff 320 Dec 19 10:00 .
drwxr-xr-x 5 user staff 160 Dec 19 09:00 ..
-rw-r--r-- 1 user staff 1024 Dec 19 10:00 package.json
-rw-r--r-- 1 user staff 2048 Dec 19 10:00 README.md
drwxr-xr-x 8 user staff 256 Dec 19 10:00 src
drwxr-xr-x 4 user staff 128 Dec 19 10:00 tests`
}]
});
const turn2 = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages
});
if (turn2.error) {
console.log(` ERROR: ${turn2.error.message}`);
allPassed = false;
results.push({ name: 'Turn 2: After tool result', passed: false });
} else {
const content = analyzeContent(turn2.content);
const events = analyzeEvents(turn2.events);
console.log(' Content:');
console.log(` Thinking: ${content.hasThinking ? 'YES' : 'NO'} (${content.thinking.length} blocks)`);
console.log(` Signature: ${content.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Text: ${content.hasText ? 'YES' : 'NO'}`);
console.log(' Events:');
console.log(` thinking_delta: ${events.thinkingDeltas}`);
console.log(` signature_delta: ${events.signatureDeltas}`);
console.log(` text_delta: ${events.textDeltas}`);
if (content.hasText && content.text[0].text) {
console.log(` Response: "${content.text[0].text.substring(0, 100)}..."`);
}
const passed = content.hasThinking && content.hasText && events.textDeltas > 0;
results.push({ name: 'Turn 2: Thinking + Text response', passed });
if (!passed) allPassed = false;
}
}
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
for (const result of results) {
const status = result.passed ? 'PASS' : 'FAIL';
console.log(` [${status}] ${result.name}`);
}
console.log('\n' + '='.repeat(60));
console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);
}
runTests().catch(err => {
console.error('Test failed with error:', err);
process.exit(1);
});

View File

@@ -0,0 +1,278 @@
/**
* Multi-Turn Tool Call Test (Non-Streaming)
*
* Simulates Claude Code's actual multi-turn conversation pattern:
* 1. User asks question requiring tool
* 2. Assistant responds with thinking + tool_use
* 3. User provides tool_result
* 4. Assistant responds with thinking + final answer
*
* Key aspects tested:
* - Thinking blocks with signatures are preserved across turns
* - Tool use/result flow works correctly
* - Interleaved thinking with tools
*/
const http = require('http');
const BASE_URL = 'localhost';
const PORT = 8080;
function makeRequest(body) {
return new Promise((resolve, reject) => {
const data = JSON.stringify(body);
const req = http.request({
host: BASE_URL,
port: PORT,
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'anthropic-beta': 'interleaved-thinking-2025-05-14',
'Content-Length': Buffer.byteLength(data)
}
}, res => {
let fullData = '';
res.on('data', chunk => fullData += chunk.toString());
res.on('end', () => {
try {
const parsed = JSON.parse(fullData);
resolve({ ...parsed, statusCode: res.statusCode });
} catch (e) {
reject(new Error(`Parse error: ${e.message}\nRaw: ${fullData.substring(0, 500)}`));
}
});
});
req.on('error', reject);
req.write(data);
req.end();
});
}
const tools = [{
name: 'search_files',
description: 'Search for files matching a pattern',
input_schema: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'Glob pattern to search' },
path: { type: 'string', description: 'Directory to search in' }
},
required: ['pattern']
}
}, {
name: 'read_file',
description: 'Read contents of a file',
input_schema: {
type: 'object',
properties: {
path: { type: 'string', description: 'Path to file' }
},
required: ['path']
}
}];
function analyzeContent(content) {
const thinking = content.filter(b => b.type === 'thinking');
const toolUse = content.filter(b => b.type === 'tool_use');
const text = content.filter(b => b.type === 'text');
return {
thinking,
toolUse,
text,
hasThinking: thinking.length > 0,
hasToolUse: toolUse.length > 0,
hasText: text.length > 0,
thinkingHasSignature: thinking.some(t => t.signature && t.signature.length >= 50)
};
}
async function runTests() {
console.log('='.repeat(60));
console.log('MULTI-TURN TOOL CALL TEST (NON-STREAMING)');
console.log('Simulates Claude Code conversation pattern');
console.log('='.repeat(60));
console.log('');
let messages = [];
let allPassed = true;
const results = [];
// ===== TURN 1: Initial request =====
console.log('TURN 1: User asks to find and read a config file');
console.log('-'.repeat(40));
messages.push({
role: 'user',
content: 'Find the package.json file and tell me what dependencies it has. Use search_files first.'
});
const turn1 = await makeRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: false,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages
});
if (turn1.statusCode !== 200 || turn1.error) {
console.log(` ERROR: ${turn1.error?.message || `Status ${turn1.statusCode}`}`);
allPassed = false;
results.push({ name: 'Turn 1: Initial request', passed: false });
} else {
const analysis = analyzeContent(turn1.content || []);
console.log(` Thinking: ${analysis.hasThinking ? 'YES' : 'NO'} (${analysis.thinking.length} blocks)`);
console.log(` Signature: ${analysis.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Tool Use: ${analysis.hasToolUse ? 'YES' : 'NO'} (${analysis.toolUse.length} calls)`);
console.log(` Text: ${analysis.hasText ? 'YES' : 'NO'}`);
if (analysis.hasThinking && analysis.thinking[0].thinking) {
console.log(` Thinking: "${analysis.thinking[0].thinking.substring(0, 60)}..."`);
}
if (analysis.hasToolUse) {
console.log(` Tool: ${analysis.toolUse[0].name}(${JSON.stringify(analysis.toolUse[0].input)})`);
}
const passed = analysis.hasThinking && analysis.thinkingHasSignature && analysis.hasToolUse;
results.push({ name: 'Turn 1: Thinking + Signature + Tool Use', passed });
if (!passed) allPassed = false;
// Prepare for turn 2
if (analysis.hasToolUse) {
messages.push({ role: 'assistant', content: turn1.content });
}
}
// ===== TURN 2: Provide tool result =====
if (messages.length >= 2) {
console.log('\nTURN 2: Provide tool result, expect another tool call');
console.log('-'.repeat(40));
const lastAssistant = messages[messages.length - 1];
const toolUseBlock = lastAssistant.content.find(b => b.type === 'tool_use');
messages.push({
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: toolUseBlock.id,
content: 'Found files:\n- /project/package.json\n- /project/packages/core/package.json'
}]
});
const turn2 = await makeRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: false,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages
});
if (turn2.statusCode !== 200 || turn2.error) {
console.log(` ERROR: ${turn2.error?.message || `Status ${turn2.statusCode}`}`);
allPassed = false;
results.push({ name: 'Turn 2: After tool result', passed: false });
} else {
const analysis = analyzeContent(turn2.content || []);
console.log(` Thinking: ${analysis.hasThinking ? 'YES' : 'NO'} (${analysis.thinking.length} blocks)`);
console.log(` Signature: ${analysis.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Tool Use: ${analysis.hasToolUse ? 'YES' : 'NO'} (${analysis.toolUse.length} calls)`);
console.log(` Text: ${analysis.hasText ? 'YES' : 'NO'}`);
if (analysis.hasThinking && analysis.thinking[0].thinking) {
console.log(` Thinking: "${analysis.thinking[0].thinking.substring(0, 60)}..."`);
}
if (analysis.hasToolUse) {
console.log(` Tool: ${analysis.toolUse[0].name}(${JSON.stringify(analysis.toolUse[0].input)})`);
}
// Either tool use (to read file) or text response is acceptable
const passed = analysis.hasThinking && (analysis.hasToolUse || analysis.hasText);
results.push({ name: 'Turn 2: Thinking + (Tool or Text)', passed });
if (!passed) allPassed = false;
if (analysis.hasToolUse) {
messages.push({ role: 'assistant', content: turn2.content });
}
}
}
// ===== TURN 3: Final tool result and response =====
if (messages.length >= 4) {
const lastAssistant = messages[messages.length - 1];
const toolUseBlock = lastAssistant.content?.find(b => b.type === 'tool_use');
if (toolUseBlock) {
console.log('\nTURN 3: Provide file content, expect final response');
console.log('-'.repeat(40));
messages.push({
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: toolUseBlock.id,
content: JSON.stringify({
name: 'my-project',
dependencies: {
express: '^4.18.2',
cors: '^2.8.5'
}
}, null, 2)
}]
});
const turn3 = await makeRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: false,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages
});
if (turn3.statusCode !== 200 || turn3.error) {
console.log(` ERROR: ${turn3.error?.message || `Status ${turn3.statusCode}`}`);
allPassed = false;
results.push({ name: 'Turn 3: Final response', passed: false });
} else {
const analysis = analyzeContent(turn3.content || []);
console.log(` Thinking: ${analysis.hasThinking ? 'YES' : 'NO'} (${analysis.thinking.length} blocks)`);
console.log(` Signature: ${analysis.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Text: ${analysis.hasText ? 'YES' : 'NO'}`);
if (analysis.hasText && analysis.text[0].text) {
console.log(` Response: "${analysis.text[0].text.substring(0, 100)}..."`);
}
const passed = analysis.hasThinking && analysis.hasText;
results.push({ name: 'Turn 3: Thinking + Text response', passed });
if (!passed) allPassed = false;
}
}
}
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
for (const result of results) {
const status = result.passed ? 'PASS' : 'FAIL';
console.log(` [${status}] ${result.name}`);
}
console.log('\n' + '='.repeat(60));
console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);
}
runTests().catch(err => {
console.error('Test failed with error:', err);
process.exit(1);
});

View File

@@ -0,0 +1,272 @@
/**
* Thinking Signature Test
*
* Tests that thinking blocks with signatures are properly handled in multi-turn
* conversations, simulating how Claude Code sends requests.
*
* Claude Code sends assistant messages with thinking blocks that include signatures.
* These signatures must be preserved and sent back to the API.
*/
const http = require('http');
const BASE_URL = 'localhost';
const PORT = 8080;
function streamRequest(body) {
return new Promise((resolve, reject) => {
const data = JSON.stringify(body);
const req = http.request({
host: BASE_URL,
port: PORT,
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'anthropic-beta': 'interleaved-thinking-2025-05-14',
'Content-Length': Buffer.byteLength(data)
}
}, res => {
const events = [];
let fullData = '';
res.on('data', chunk => {
fullData += chunk.toString();
});
res.on('end', () => {
// Parse SSE events
const parts = fullData.split('\n\n').filter(e => e.trim());
for (const part of parts) {
const lines = part.split('\n');
const eventLine = lines.find(l => l.startsWith('event:'));
const dataLine = lines.find(l => l.startsWith('data:'));
if (eventLine && dataLine) {
try {
const eventType = eventLine.replace('event:', '').trim();
const eventData = JSON.parse(dataLine.replace('data:', '').trim());
events.push({ type: eventType, data: eventData });
} catch (e) { }
}
}
// Build content from events
const content = [];
let currentBlock = null;
for (const event of events) {
if (event.type === 'content_block_start') {
currentBlock = { ...event.data.content_block };
if (currentBlock.type === 'thinking') {
currentBlock.thinking = '';
currentBlock.signature = '';
}
if (currentBlock.type === 'text') currentBlock.text = '';
} else if (event.type === 'content_block_delta') {
const delta = event.data.delta;
if (delta.type === 'thinking_delta' && currentBlock) {
currentBlock.thinking += delta.thinking || '';
}
if (delta.type === 'signature_delta' && currentBlock) {
currentBlock.signature += delta.signature || '';
}
if (delta.type === 'text_delta' && currentBlock) {
currentBlock.text += delta.text || '';
}
if (delta.type === 'input_json_delta' && currentBlock) {
currentBlock.partial_json = (currentBlock.partial_json || '') + delta.partial_json;
}
} else if (event.type === 'content_block_stop') {
if (currentBlock?.type === 'tool_use' && currentBlock.partial_json) {
try { currentBlock.input = JSON.parse(currentBlock.partial_json); } catch (e) { }
delete currentBlock.partial_json;
}
if (currentBlock) content.push(currentBlock);
currentBlock = null;
}
}
resolve({ content, events, statusCode: res.statusCode, raw: fullData });
});
});
req.on('error', reject);
req.write(data);
req.end();
});
}
const tools = [{
name: 'get_weather',
description: 'Get the current weather for a location',
input_schema: {
type: 'object',
properties: {
location: { type: 'string', description: 'City name' }
},
required: ['location']
}
}];
async function runTests() {
console.log('='.repeat(60));
console.log('THINKING SIGNATURE TEST');
console.log('Simulates Claude Code multi-turn with thinking blocks');
console.log('='.repeat(60));
console.log('');
let allPassed = true;
const results = [];
// ===== TEST 1: First turn - get thinking block with signature =====
console.log('TEST 1: Initial request with thinking model');
console.log('-'.repeat(40));
const turn1Messages = [
{ role: 'user', content: 'What is the weather in Paris? Use the get_weather tool.' }
];
const turn1Result = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages: turn1Messages
});
const turn1Thinking = turn1Result.content.filter(b => b.type === 'thinking');
const turn1ToolUse = turn1Result.content.filter(b => b.type === 'tool_use');
const turn1Text = turn1Result.content.filter(b => b.type === 'text');
console.log(` Thinking blocks: ${turn1Thinking.length}`);
console.log(` Tool use blocks: ${turn1ToolUse.length}`);
console.log(` Text blocks: ${turn1Text.length}`);
// Check thinking has signature
let turn1HasSignature = false;
if (turn1Thinking.length > 0) {
const sig = turn1Thinking[0].signature || '';
turn1HasSignature = sig.length >= 50;
console.log(` Signature length: ${sig.length} chars`);
console.log(` Signature present: ${turn1HasSignature ? 'YES' : 'NO'}`);
if (turn1Thinking[0].thinking) {
console.log(` Thinking preview: "${turn1Thinking[0].thinking.substring(0, 80)}..."`);
}
}
const test1Pass = turn1Thinking.length > 0 && turn1HasSignature && turn1ToolUse.length > 0;
results.push({ name: 'Turn 1: Thinking + Signature + Tool Use', passed: test1Pass });
console.log(` Result: ${test1Pass ? 'PASS' : 'FAIL'}`);
if (!test1Pass) allPassed = false;
// ===== TEST 2: Second turn - send back thinking with signature =====
console.log('\nTEST 2: Multi-turn with thinking signature in assistant message');
console.log('-'.repeat(40));
if (!turn1ToolUse.length) {
console.log(' SKIPPED - No tool use in turn 1');
results.push({ name: 'Turn 2: Multi-turn with signature', passed: false, skipped: true });
} else {
// Build assistant message with thinking (including signature) - this is how Claude Code sends it
const assistantContent = turn1Result.content;
// Verify the thinking block has signature before sending
const thinkingInAssistant = assistantContent.find(b => b.type === 'thinking');
if (thinkingInAssistant) {
console.log(` Sending thinking with signature: ${(thinkingInAssistant.signature || '').length} chars`);
}
const turn2Messages = [
...turn1Messages,
{ role: 'assistant', content: assistantContent },
{
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: turn1ToolUse[0].id,
content: 'The weather in Paris is 18°C and sunny.'
}]
}
];
const turn2Result = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 4096,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages: turn2Messages
});
const turn2Thinking = turn2Result.content.filter(b => b.type === 'thinking');
const turn2Text = turn2Result.content.filter(b => b.type === 'text');
console.log(` Thinking blocks: ${turn2Thinking.length}`);
console.log(` Text blocks: ${turn2Text.length}`);
// Check for errors
const hasError = turn2Result.events.some(e => e.type === 'error');
if (hasError) {
const errorEvent = turn2Result.events.find(e => e.type === 'error');
console.log(` ERROR: ${errorEvent?.data?.error?.message || 'Unknown error'}`);
}
if (turn2Thinking.length > 0) {
const sig = turn2Thinking[0].signature || '';
console.log(` New signature length: ${sig.length} chars`);
if (turn2Thinking[0].thinking) {
console.log(` Thinking preview: "${turn2Thinking[0].thinking.substring(0, 80)}..."`);
}
}
if (turn2Text.length > 0 && turn2Text[0].text) {
console.log(` Response: "${turn2Text[0].text.substring(0, 100)}..."`);
}
const test2Pass = !hasError && (turn2Thinking.length > 0 || turn2Text.length > 0);
results.push({ name: 'Turn 2: Multi-turn with signature', passed: test2Pass });
console.log(` Result: ${test2Pass ? 'PASS' : 'FAIL'}`);
if (!test2Pass) allPassed = false;
}
// ===== TEST 3: Verify signature_delta events in stream =====
console.log('\nTEST 3: Verify signature_delta events in stream');
console.log('-'.repeat(40));
const signatureDeltas = turn1Result.events.filter(
e => e.type === 'content_block_delta' && e.data?.delta?.type === 'signature_delta'
);
console.log(` signature_delta events: ${signatureDeltas.length}`);
if (signatureDeltas.length > 0) {
const totalSigLength = signatureDeltas.reduce((sum, e) => sum + (e.data.delta.signature?.length || 0), 0);
console.log(` Total signature length from deltas: ${totalSigLength} chars`);
}
const test3Pass = signatureDeltas.length > 0;
results.push({ name: 'signature_delta events present', passed: test3Pass });
console.log(` Result: ${test3Pass ? 'PASS' : 'FAIL'}`);
if (!test3Pass) allPassed = false;
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
for (const result of results) {
const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL');
console.log(` [${status}] ${result.name}`);
}
console.log('\n' + '='.repeat(60));
console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);
}
runTests().catch(err => {
console.error('Test failed with error:', err);
process.exit(1);
});

BIN
tests/utils/test_image.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB