initial commit

2025-12-19 19:20:28 +05:30
parent 52d72b7bff
commit 5ae29947b1
18 changed files with 3925 additions and 494 deletions
--- a/tests/test-multiturn-thinking-tools.cjs
+++ b/tests/test-multiturn-thinking-tools.cjs
@@ -0,0 +1,278 @@
+/**
+ * Multi-Turn Tool Call Test (Non-Streaming)
+ *
+ * Simulates Claude Code's actual multi-turn conversation pattern:
+ * 1. User asks question requiring tool
+ * 2. Assistant responds with thinking + tool_use
+ * 3. User provides tool_result
+ * 4. Assistant responds with thinking + final answer
+ *
+ * Key aspects tested:
+ * - Thinking blocks with signatures are preserved across turns
+ * - Tool use/result flow works correctly
+ * - Interleaved thinking with tools
+ */
+const http = require('http');
+
+const BASE_URL = 'localhost';
+const PORT = 8080;
+
+function makeRequest(body) {
+    return new Promise((resolve, reject) => {
+        const data = JSON.stringify(body);
+        const req = http.request({
+            host: BASE_URL,
+            port: PORT,
+            path: '/v1/messages',
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'x-api-key': 'test',
+                'anthropic-version': '2023-06-01',
+                'anthropic-beta': 'interleaved-thinking-2025-05-14',
+                'Content-Length': Buffer.byteLength(data)
+            }
+        }, res => {
+            let fullData = '';
+            res.on('data', chunk => fullData += chunk.toString());
+            res.on('end', () => {
+                try {
+                    const parsed = JSON.parse(fullData);
+                    resolve({ ...parsed, statusCode: res.statusCode });
+                } catch (e) {
+                    reject(new Error(`Parse error: ${e.message}\nRaw: ${fullData.substring(0, 500)}`));
+                }
+            });
+        });
+        req.on('error', reject);
+        req.write(data);
+        req.end();
+    });
+}
+
+const tools = [{
+    name: 'search_files',
+    description: 'Search for files matching a pattern',
+    input_schema: {
+        type: 'object',
+        properties: {
+            pattern: { type: 'string', description: 'Glob pattern to search' },
+            path: { type: 'string', description: 'Directory to search in' }
+        },
+        required: ['pattern']
+    }
+}, {
+    name: 'read_file',
+    description: 'Read contents of a file',
+    input_schema: {
+        type: 'object',
+        properties: {
+            path: { type: 'string', description: 'Path to file' }
+        },
+        required: ['path']
+    }
+}];
+
+function analyzeContent(content) {
+    const thinking = content.filter(b => b.type === 'thinking');
+    const toolUse = content.filter(b => b.type === 'tool_use');
+    const text = content.filter(b => b.type === 'text');
+
+    return {
+        thinking,
+        toolUse,
+        text,
+        hasThinking: thinking.length > 0,
+        hasToolUse: toolUse.length > 0,
+        hasText: text.length > 0,
+        thinkingHasSignature: thinking.some(t => t.signature && t.signature.length >= 50)
+    };
+}
+
+async function runTests() {
+    console.log('='.repeat(60));
+    console.log('MULTI-TURN TOOL CALL TEST (NON-STREAMING)');
+    console.log('Simulates Claude Code conversation pattern');
+    console.log('='.repeat(60));
+    console.log('');
+
+    let messages = [];
+    let allPassed = true;
+    const results = [];
+
+    // ===== TURN 1: Initial request =====
+    console.log('TURN 1: User asks to find and read a config file');
+    console.log('-'.repeat(40));
+
+    messages.push({
+        role: 'user',
+        content: 'Find the package.json file and tell me what dependencies it has. Use search_files first.'
+    });
+
+    const turn1 = await makeRequest({
+        model: 'claude-sonnet-4-5-thinking',
+        max_tokens: 4096,
+        stream: false,
+        tools,
+        thinking: { type: 'enabled', budget_tokens: 10000 },
+        messages
+    });
+
+    if (turn1.statusCode !== 200 || turn1.error) {
+        console.log(`  ERROR: ${turn1.error?.message || `Status ${turn1.statusCode}`}`);
+        allPassed = false;
+        results.push({ name: 'Turn 1: Initial request', passed: false });
+    } else {
+        const analysis = analyzeContent(turn1.content || []);
+        console.log(`  Thinking: ${analysis.hasThinking ? 'YES' : 'NO'} (${analysis.thinking.length} blocks)`);
+        console.log(`  Signature: ${analysis.thinkingHasSignature ? 'YES' : 'NO'}`);
+        console.log(`  Tool Use: ${analysis.hasToolUse ? 'YES' : 'NO'} (${analysis.toolUse.length} calls)`);
+        console.log(`  Text: ${analysis.hasText ? 'YES' : 'NO'}`);
+
+        if (analysis.hasThinking && analysis.thinking[0].thinking) {
+            console.log(`  Thinking: "${analysis.thinking[0].thinking.substring(0, 60)}..."`);
+        }
+        if (analysis.hasToolUse) {
+            console.log(`  Tool: ${analysis.toolUse[0].name}(${JSON.stringify(analysis.toolUse[0].input)})`);
+        }
+
+        const passed = analysis.hasThinking && analysis.thinkingHasSignature && analysis.hasToolUse;
+        results.push({ name: 'Turn 1: Thinking + Signature + Tool Use', passed });
+        if (!passed) allPassed = false;
+
+        // Prepare for turn 2
+        if (analysis.hasToolUse) {
+            messages.push({ role: 'assistant', content: turn1.content });
+        }
+    }
+
+    // ===== TURN 2: Provide tool result =====
+    if (messages.length >= 2) {
+        console.log('\nTURN 2: Provide tool result, expect another tool call');
+        console.log('-'.repeat(40));
+
+        const lastAssistant = messages[messages.length - 1];
+        const toolUseBlock = lastAssistant.content.find(b => b.type === 'tool_use');
+
+        messages.push({
+            role: 'user',
+            content: [{
+                type: 'tool_result',
+                tool_use_id: toolUseBlock.id,
+                content: 'Found files:\n- /project/package.json\n- /project/packages/core/package.json'
+            }]
+        });
+
+        const turn2 = await makeRequest({
+            model: 'claude-sonnet-4-5-thinking',
+            max_tokens: 4096,
+            stream: false,
+            tools,
+            thinking: { type: 'enabled', budget_tokens: 10000 },
+            messages
+        });
+
+        if (turn2.statusCode !== 200 || turn2.error) {
+            console.log(`  ERROR: ${turn2.error?.message || `Status ${turn2.statusCode}`}`);
+            allPassed = false;
+            results.push({ name: 'Turn 2: After tool result', passed: false });
+        } else {
+            const analysis = analyzeContent(turn2.content || []);
+            console.log(`  Thinking: ${analysis.hasThinking ? 'YES' : 'NO'} (${analysis.thinking.length} blocks)`);
+            console.log(`  Signature: ${analysis.thinkingHasSignature ? 'YES' : 'NO'}`);
+            console.log(`  Tool Use: ${analysis.hasToolUse ? 'YES' : 'NO'} (${analysis.toolUse.length} calls)`);
+            console.log(`  Text: ${analysis.hasText ? 'YES' : 'NO'}`);
+
+            if (analysis.hasThinking && analysis.thinking[0].thinking) {
+                console.log(`  Thinking: "${analysis.thinking[0].thinking.substring(0, 60)}..."`);
+            }
+            if (analysis.hasToolUse) {
+                console.log(`  Tool: ${analysis.toolUse[0].name}(${JSON.stringify(analysis.toolUse[0].input)})`);
+            }
+
+            // Either tool use (to read file) or text response is acceptable
+            const passed = analysis.hasThinking && (analysis.hasToolUse || analysis.hasText);
+            results.push({ name: 'Turn 2: Thinking + (Tool or Text)', passed });
+            if (!passed) allPassed = false;
+
+            if (analysis.hasToolUse) {
+                messages.push({ role: 'assistant', content: turn2.content });
+            }
+        }
+    }
+
+    // ===== TURN 3: Final tool result and response =====
+    if (messages.length >= 4) {
+        const lastAssistant = messages[messages.length - 1];
+        const toolUseBlock = lastAssistant.content?.find(b => b.type === 'tool_use');
+
+        if (toolUseBlock) {
+            console.log('\nTURN 3: Provide file content, expect final response');
+            console.log('-'.repeat(40));
+
+            messages.push({
+                role: 'user',
+                content: [{
+                    type: 'tool_result',
+                    tool_use_id: toolUseBlock.id,
+                    content: JSON.stringify({
+                        name: 'my-project',
+                        dependencies: {
+                            express: '^4.18.2',
+                            cors: '^2.8.5'
+                        }
+                    }, null, 2)
+                }]
+            });
+
+            const turn3 = await makeRequest({
+                model: 'claude-sonnet-4-5-thinking',
+                max_tokens: 4096,
+                stream: false,
+                tools,
+                thinking: { type: 'enabled', budget_tokens: 10000 },
+                messages
+            });
+
+            if (turn3.statusCode !== 200 || turn3.error) {
+                console.log(`  ERROR: ${turn3.error?.message || `Status ${turn3.statusCode}`}`);
+                allPassed = false;
+                results.push({ name: 'Turn 3: Final response', passed: false });
+            } else {
+                const analysis = analyzeContent(turn3.content || []);
+                console.log(`  Thinking: ${analysis.hasThinking ? 'YES' : 'NO'} (${analysis.thinking.length} blocks)`);
+                console.log(`  Signature: ${analysis.thinkingHasSignature ? 'YES' : 'NO'}`);
+                console.log(`  Text: ${analysis.hasText ? 'YES' : 'NO'}`);
+
+                if (analysis.hasText && analysis.text[0].text) {
+                    console.log(`  Response: "${analysis.text[0].text.substring(0, 100)}..."`);
+                }
+
+                const passed = analysis.hasThinking && analysis.hasText;
+                results.push({ name: 'Turn 3: Thinking + Text response', passed });
+                if (!passed) allPassed = false;
+            }
+        }
+    }
+
+    // ===== Summary =====
+    console.log('\n' + '='.repeat(60));
+    console.log('SUMMARY');
+    console.log('='.repeat(60));
+
+    for (const result of results) {
+        const status = result.passed ? 'PASS' : 'FAIL';
+        console.log(`  [${status}] ${result.name}`);
+    }
+
+    console.log('\n' + '='.repeat(60));
+    console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
+    console.log('='.repeat(60));
+
+    process.exit(allPassed ? 0 : 1);
+}
+
+runTests().catch(err => {
+    console.error('Test failed with error:', err);
+    process.exit(1);
+});