feat: add prompt caching, sticky account selection, and non-thinking model

- Implement sticky account selection for prompt cache continuity
- Derive stable session ID from first user message (SHA256 hash)
- Return cache_read_input_tokens in usage metadata
- Add claude-sonnet-4-5 model without thinking
- Remove DEFAULT_THINKING_BUDGET (let API use its default)
- Add prompt caching test
- Update README and CLAUDE.md documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2025-12-25 13:26:48 +05:30
parent 943a4dcb20
commit 01cda835d9
10 changed files with 464 additions and 80 deletions

View File

@@ -10,7 +10,6 @@
import crypto from 'crypto';
import {
MODEL_MAPPINGS,
DEFAULT_THINKING_BUDGET,
CLAUDE_THINKING_MAX_OUTPUT_TOKENS,
MIN_SIGNATURE_LENGTH
} from './constants.js';
@@ -502,21 +501,27 @@ export function convertAnthropicToGoogle(anthropicRequest) {
// Enable thinking for Claude thinking models
if (isClaudeThinkingModel) {
// Get budget from request or use default
const thinkingBudget = thinking?.budget_tokens || DEFAULT_THINKING_BUDGET;
googleRequest.generationConfig.thinkingConfig = {
include_thoughts: true,
thinking_budget: thinkingBudget
const thinkingConfig = {
include_thoughts: true
};
// Ensure maxOutputTokens is large enough for thinking models
if (!googleRequest.generationConfig.maxOutputTokens ||
googleRequest.generationConfig.maxOutputTokens <= thinkingBudget) {
googleRequest.generationConfig.maxOutputTokens = CLAUDE_THINKING_MAX_OUTPUT_TOKENS;
// Only set thinking_budget if explicitly provided
const thinkingBudget = thinking?.budget_tokens;
if (thinkingBudget) {
thinkingConfig.thinking_budget = thinkingBudget;
// Ensure maxOutputTokens is large enough when budget is specified
if (!googleRequest.generationConfig.maxOutputTokens ||
googleRequest.generationConfig.maxOutputTokens <= thinkingBudget) {
googleRequest.generationConfig.maxOutputTokens = CLAUDE_THINKING_MAX_OUTPUT_TOKENS;
}
console.log('[FormatConverter] Thinking enabled with budget:', thinkingBudget);
} else {
console.log('[FormatConverter] Thinking enabled (no budget specified)');
}
console.log('[FormatConverter] Thinking enabled with budget:', thinkingBudget);
googleRequest.generationConfig.thinkingConfig = thinkingConfig;
}
// Convert tools to Google format
@@ -696,7 +701,11 @@ export function convertGoogleToAnthropic(googleResponse, model) {
}
// Extract usage metadata
// Note: Antigravity's promptTokenCount is the TOTAL (includes cached),
// but Anthropic's input_tokens excludes cached. We subtract to match.
const usageMetadata = response.usageMetadata || {};
const promptTokens = usageMetadata.promptTokenCount || 0;
const cachedTokens = usageMetadata.cachedContentTokenCount || 0;
return {
id: `msg_${crypto.randomBytes(16).toString('hex')}`,
@@ -707,8 +716,10 @@ export function convertGoogleToAnthropic(googleResponse, model) {
stop_reason: stopReason,
stop_sequence: null,
usage: {
input_tokens: usageMetadata.promptTokenCount || 0,
output_tokens: usageMetadata.candidatesTokenCount || 0
input_tokens: promptTokens - cachedTokens,
output_tokens: usageMetadata.candidatesTokenCount || 0,
cache_read_input_tokens: cachedTokens,
cache_creation_input_tokens: 0
}
};
}