Added support for Gemini models

This commit is contained in:
Badri Narayanan S
2025-12-27 14:09:20 +05:30
parent 9b7dcf3a6c
commit c1e1dbb0ef
13 changed files with 641 additions and 176 deletions

View File

@@ -6,14 +6,18 @@
* - SSE events are properly formatted
* - signature_delta events are present
* - Thinking blocks accumulate correctly across deltas
*
* Runs for both Claude and Gemini model families.
*/
const { streamRequest, analyzeContent, analyzeEvents, commonTools } = require('./helpers/http-client.cjs');
const { getTestModels, getModelConfig } = require('./helpers/test-models.cjs');
const tools = [commonTools.executeCommand];
async function runTests() {
async function runTestsForModel(family, model) {
console.log('='.repeat(60));
console.log('MULTI-TURN TOOL CALL TEST (STREAMING)');
console.log(`MULTI-TURN TOOL CALL TEST [${family.toUpperCase()}]`);
console.log(`Model: ${model}`);
console.log('Simulates Claude Code streaming conversation');
console.log('='.repeat(60));
console.log('');
@@ -21,6 +25,7 @@ async function runTests() {
let messages = [];
let allPassed = true;
const results = [];
const modelConfig = getModelConfig(family);
// ===== TURN 1: Initial request =====
console.log('TURN 1: User asks to run a command');
@@ -32,11 +37,11 @@ async function runTests() {
});
const turn1 = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 16000,
model,
max_tokens: modelConfig.max_tokens,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
thinking: modelConfig.thinking,
messages
});
@@ -50,7 +55,7 @@ async function runTests() {
console.log(' Content:');
console.log(` Thinking: ${content.hasThinking ? 'YES' : 'NO'} (${content.thinking.length} blocks)`);
console.log(` Signature: ${content.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Signature: ${content.hasSignature ? 'YES' : 'NO'}`);
console.log(` Tool Use: ${content.hasToolUse ? 'YES' : 'NO'} (${content.toolUse.length} calls)`);
console.log(' Events:');
@@ -67,9 +72,11 @@ async function runTests() {
console.log(` Tool: ${content.toolUse[0].name}(${JSON.stringify(content.toolUse[0].input)})`);
}
const passed = content.hasThinking && content.thinkingHasSignature &&
events.signatureDeltas > 0 && content.hasToolUse;
results.push({ name: 'Turn 1: Thinking + Signature + Tool Use + Events', passed });
// For Claude: signature is on thinking block and comes via signature_delta events
// For Gemini: signature is on tool_use block (no signature_delta events)
const hasSignature = content.hasSignature || events.signatureDeltas > 0;
const passed = content.hasThinking && hasSignature && content.hasToolUse;
results.push({ name: 'Turn 1: Thinking + Signature + Tool Use', passed });
if (!passed) allPassed = false;
if (content.hasToolUse) {
@@ -101,11 +108,11 @@ drwxr-xr-x 4 user staff 128 Dec 19 10:00 tests`
});
const turn2 = await streamRequest({
model: 'claude-sonnet-4-5-thinking',
max_tokens: 16000,
model,
max_tokens: modelConfig.max_tokens,
stream: true,
tools,
thinking: { type: 'enabled', budget_tokens: 10000 },
thinking: modelConfig.thinking,
messages
});
@@ -119,7 +126,7 @@ drwxr-xr-x 4 user staff 128 Dec 19 10:00 tests`
console.log(' Content:');
console.log(` Thinking: ${content.hasThinking ? 'YES' : 'NO'} (${content.thinking.length} blocks)`);
console.log(` Signature: ${content.thinkingHasSignature ? 'YES' : 'NO'}`);
console.log(` Signature: ${content.hasSignature ? 'YES' : 'NO'}`);
console.log(` Text: ${content.hasText ? 'YES' : 'NO'}`);
console.log(' Events:');
@@ -139,7 +146,7 @@ drwxr-xr-x 4 user staff 128 Dec 19 10:00 tests`
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log(`SUMMARY [${family.toUpperCase()}]`);
console.log('='.repeat(60));
for (const result of results) {
@@ -148,7 +155,26 @@ drwxr-xr-x 4 user staff 128 Dec 19 10:00 tests`
}
console.log('\n' + '='.repeat(60));
console.log(`OVERALL: ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log(`[${family.toUpperCase()}] ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
return allPassed;
}
async function runTests() {
const models = getTestModels();
let allPassed = true;
for (const { family, model } of models) {
console.log('\n');
const passed = await runTestsForModel(family, model);
if (!passed) allPassed = false;
}
console.log('\n' + '='.repeat(60));
console.log('FINAL RESULT');
console.log('='.repeat(60));
console.log(`Overall: ${allPassed ? 'ALL MODEL FAMILIES PASSED' : 'SOME MODEL FAMILIES FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);