Files
antigravity-claude-proxy/tests/test-caching-streaming.cjs
Badri Narayanan S 12d196f6a0 refactor: centralize TEST_MODELS and DEFAULT_PRESETS in constants.js
- Move TEST_MODELS and DEFAULT_PRESETS to src/constants.js as single source of truth
- Update test-models.cjs helper to use dynamic import from constants
- Make getTestModels() and getModels() async functions
- Update all test files to await async model config loading
- Remove duplicate THINKING_MODELS and getThinkingModels() from test helper
- Make thinking tests more lenient for Gemini (doesn't always produce thinking blocks)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-13 19:20:57 +05:30

198 lines
7.1 KiB
JavaScript

/**
* Prompt Caching Test (Streaming)
*
* Verifies that prompt caching is working correctly:
* - Session ID is stable across turns (derived from first user message)
* - cache_read_input_tokens is returned in usage metadata
* - Second turn in same conversation should hit cache
*
* Runs for both Claude and Gemini model families.
*/
const { streamRequest, analyzeContent, extractUsage } = require('./helpers/http-client.cjs');
const { getTestModels, getModelConfig } = require('./helpers/test-models.cjs');
// Large system prompt to exceed 1024 token minimum for caching
// This matches the format used in the working direct API test (~36KB)
const LARGE_SYSTEM_PROMPT = 'You are an expert software engineer. Here is important context:\n' +
'// Large codebase file content line\n'.repeat(1000);
async function runTestsForModel(family, model) {
console.log('='.repeat(60));
console.log(`PROMPT CACHING TEST [${family.toUpperCase()}]`);
console.log(`Model: ${model}`);
console.log('Verifies session ID stability and cache token reporting');
console.log('='.repeat(60));
console.log('');
let allPassed = true;
const results = [];
const modelConfig = getModelConfig(family);
// ===== TURN 1: Initial request =====
console.log('TURN 1: Initial request (establishes cache)');
console.log('-'.repeat(40));
const turn1Messages = [
{
role: 'user',
content: 'Hello! Tell me briefly about JavaScript in one sentence.'
}
];
const turn1 = await streamRequest({
model,
max_tokens: modelConfig.max_tokens,
stream: true,
system: LARGE_SYSTEM_PROMPT,
thinking: modelConfig.thinking,
messages: turn1Messages
});
if (turn1.statusCode !== 200) {
console.log(` ERROR: Status ${turn1.statusCode}`);
allPassed = false;
results.push({ name: 'Turn 1: Initial request', passed: false });
} else {
const content = analyzeContent(turn1.content);
const usage = extractUsage(turn1.events);
console.log(' Content:');
console.log(` Thinking: ${content.hasThinking ? 'YES' : 'NO'}`);
console.log(` Text: ${content.hasText ? 'YES' : 'NO'}`);
console.log(' Usage:');
console.log(` input_tokens: ${usage.input_tokens}`);
console.log(` output_tokens: ${usage.output_tokens}`);
console.log(` cache_read_input_tokens: ${usage.cache_read_input_tokens}`);
console.log(` cache_creation_input_tokens: ${usage.cache_creation_input_tokens}`);
if (content.hasText && content.text[0].text) {
console.log(` Response: "${content.text[0].text.substring(0, 80)}..."`);
}
// Turn 1 should have response and usage data
const passed = content.hasText && usage.input_tokens > 0;
results.push({ name: 'Turn 1: Has response and usage', passed });
if (!passed) allPassed = false;
}
// ===== TURN 2: Follow-up request (should hit cache) =====
console.log('\nTURN 2: Follow-up request (should use cache)');
console.log('-'.repeat(40));
// Build turn 2 messages with turn 1's response
const turn2Messages = [
...turn1Messages,
{
role: 'assistant',
content: turn1.content
},
{
role: 'user',
content: 'Now tell me about Python in one sentence.'
}
];
const turn2 = await streamRequest({
model,
max_tokens: modelConfig.max_tokens,
stream: true,
system: LARGE_SYSTEM_PROMPT,
thinking: modelConfig.thinking,
messages: turn2Messages
});
if (turn2.statusCode !== 200) {
console.log(` ERROR: Status ${turn2.statusCode}`);
allPassed = false;
results.push({ name: 'Turn 2: Follow-up request', passed: false });
} else {
const content = analyzeContent(turn2.content);
const usage = extractUsage(turn2.events);
console.log(' Content:');
console.log(` Thinking: ${content.hasThinking ? 'YES' : 'NO'}`);
console.log(` Text: ${content.hasText ? 'YES' : 'NO'}`);
console.log(' Usage:');
console.log(` input_tokens: ${usage.input_tokens}`);
console.log(` output_tokens: ${usage.output_tokens}`);
console.log(` cache_read_input_tokens: ${usage.cache_read_input_tokens}`);
console.log(` cache_creation_input_tokens: ${usage.cache_creation_input_tokens}`);
if (content.hasText && content.text[0].text) {
console.log(` Response: "${content.text[0].text.substring(0, 80)}..."`);
}
// Check if cache was hit
const cacheHit = usage.cache_read_input_tokens > 0;
if (cacheHit) {
console.log(` CACHE HIT: ${usage.cache_read_input_tokens} tokens read from cache`);
} else {
console.log(' CACHE MISS: No tokens read from cache');
console.log(' Note: Cache may take time to populate on first conversation');
}
// Turn 2 should have response
const passed = content.hasText && usage.input_tokens >= 0;
results.push({ name: 'Turn 2: Has response and usage', passed });
if (!passed) allPassed = false;
// Cache hit check (informational - not a failure if cache doesn't hit)
results.push({
name: 'Turn 2: Cache read tokens reported',
passed: true, // Just verify the field exists
info: cacheHit ? `${usage.cache_read_input_tokens} tokens` : 'No cache hit (may be first run)'
});
}
// ===== Summary =====
console.log('\n' + '='.repeat(60));
console.log(`SUMMARY [${family.toUpperCase()}]`);
console.log('='.repeat(60));
for (const result of results) {
const status = result.passed ? 'PASS' : 'FAIL';
let line = ` [${status}] ${result.name}`;
if (result.info) {
line += ` (${result.info})`;
}
console.log(line);
}
console.log('\n' + '='.repeat(60));
console.log(`[${family.toUpperCase()}] ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'}`);
console.log('='.repeat(60));
console.log('\nNote: Cache effectiveness depends on:');
console.log(' 1. Stable session ID (derived from first user message hash)');
console.log(' 2. Sticky account selection (same account across turns)');
console.log(' 3. API-side cache availability (may take time to populate)');
return allPassed;
}
async function runTests() {
const models = await getTestModels();
let allPassed = true;
for (const { family, model } of models) {
console.log('\n');
const passed = await runTestsForModel(family, model);
if (!passed) allPassed = false;
}
console.log('\n' + '='.repeat(60));
console.log('FINAL RESULT');
console.log('='.repeat(60));
console.log(`Overall: ${allPassed ? 'ALL MODEL FAMILIES PASSED' : 'SOME MODEL FAMILIES FAILED'}`);
console.log('='.repeat(60));
process.exit(allPassed ? 0 : 1);
}
runTests().catch(err => {
console.error('Test failed with error:', err);
process.exit(1);
});