feat: add prompt caching, sticky account selection, and non-thinking model

- Implement sticky account selection for prompt cache continuity
- Derive stable session ID from first user message (SHA256 hash)
- Return cache_read_input_tokens in usage metadata
- Add claude-sonnet-4-5 model without thinking
- Remove DEFAULT_THINKING_BUDGET (let API use its default)
- Add prompt caching test
- Update README and CLAUDE.md documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2025-12-25 13:26:48 +05:30
parent 943a4dcb20
commit 01cda835d9
10 changed files with 464 additions and 80 deletions

View File

@@ -178,6 +178,42 @@ function analyzeEvents(events) {
};
}
/**
* Extract usage metadata from SSE events
* @param {Array} events - Array of SSE events
* @returns {Object} - Usage info with input/output/cache tokens
*/
function extractUsage(events) {
const usage = {
input_tokens: 0,
output_tokens: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0
};
// Get usage from message_start
const messageStart = events.find(e => e.type === 'message_start');
if (messageStart?.data?.message?.usage) {
const startUsage = messageStart.data.message.usage;
usage.input_tokens = startUsage.input_tokens || 0;
usage.cache_read_input_tokens = startUsage.cache_read_input_tokens || 0;
usage.cache_creation_input_tokens = startUsage.cache_creation_input_tokens || 0;
}
// Get output tokens from message_delta
const messageDelta = events.find(e => e.type === 'message_delta');
if (messageDelta?.data?.usage) {
const deltaUsage = messageDelta.data.usage;
usage.output_tokens = deltaUsage.output_tokens || 0;
// Also check for cache tokens in delta (may be updated)
if (deltaUsage.cache_read_input_tokens !== undefined) {
usage.cache_read_input_tokens = deltaUsage.cache_read_input_tokens;
}
}
return usage;
}
// Common tool definitions for tests
const commonTools = {
getWeather: {
@@ -256,5 +292,6 @@ module.exports = {
makeRequest,
analyzeContent,
analyzeEvents,
extractUsage,
commonTools
};