feat: add prompt caching, sticky account selection, and non-thinking model

- Implement sticky account selection for prompt cache continuity - Derive stable session ID from first user message (SHA256 hash) - Return cache_read_input_tokens in usage metadata - Add claude-sonnet-4-5 model without thinking - Remove DEFAULT_THINKING_BUDGET (let API use its default) - Add prompt caching test - Update README and CLAUDE.md documentation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-25 13:26:48 +05:30
parent 943a4dcb20
commit 01cda835d9
10 changed files with 464 additions and 80 deletions
--- a/src/constants.js
+++ b/src/constants.js
@@ -93,7 +93,6 @@ export const MAX_ACCOUNTS = 10; // Maximum number of accounts allowed
 export const MAX_WAIT_BEFORE_ERROR_MS = 120000; // 2 minutes - throw error if wait exceeds this

 // Thinking model constants
-export const DEFAULT_THINKING_BUDGET = 16000; // Default thinking budget tokens
 export const CLAUDE_THINKING_MAX_OUTPUT_TOKENS = 64000; // Max output tokens for thinking models
 export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length

@@ -131,7 +130,6 @@ export default {
    MAX_RETRIES,
    MAX_ACCOUNTS,
    MAX_WAIT_BEFORE_ERROR_MS,
-    DEFAULT_THINKING_BUDGET,
    CLAUDE_THINKING_MAX_OUTPUT_TOKENS,
    MIN_SIGNATURE_LENGTH,
    OAUTH_CONFIG,