diff --git a/CLAUDE.md b/CLAUDE.md index 96eb2d9..4141085 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -55,6 +55,7 @@ npm run test:images # Image processing npm run test:caching # Prompt caching npm run test:crossmodel # Cross-model thinking signatures npm run test:oauth # OAuth no-browser mode +npm run test:cache-control # Cache control field stripping # Run strategy unit tests (no server required) node tests/test-strategies.cjs @@ -102,7 +103,8 @@ src/ │ └── trackers/ # State trackers for hybrid strategy │ ├── index.js # Re-exports trackers │ ├── health-tracker.js # Account health scores -│ └── token-bucket-tracker.js # Client-side rate limiting +│ ├── token-bucket-tracker.js # Client-side rate limiting +│ └── quota-tracker.js # Quota-aware account selection │ ├── auth/ # Authentication │ ├── oauth.js # Google OAuth with PKCE @@ -211,11 +213,15 @@ public/ - Maximizes concurrent request distribution 3. **Hybrid Strategy** (default, smart distribution): - - Uses health scores, token buckets, and LRU for selection - - Scoring formula: `score = (Health × 2) + ((Tokens / MaxTokens × 100) × 5) + (LRU × 0.1)` + - Uses health scores, token buckets, quota awareness, and LRU for selection + - Scoring formula: `score = (Health × 2) + ((Tokens / MaxTokens × 100) × 5) + (Quota × 1) + (LRU × 0.1)` - Health scores: Track success/failure patterns with passive recovery - Token buckets: Client-side rate limiting (50 tokens, 6 per minute regeneration) + - Quota awareness: Accounts with critical quota (<5%) are deprioritized - LRU freshness: Prefer accounts that have rested longer + - **Emergency/Last Resort Fallback**: When all accounts are exhausted: + - Emergency fallback: Bypasses health check, adds 250ms throttle delay + - Last resort fallback: Bypasses both health and token checks, adds 500ms throttle delay - Configuration in `src/config.js` under `accountSelection` **Account Data Model:** @@ -251,6 +257,14 @@ Each account object in `accounts.json` contains: - For Gemini targets: strict validation - drops unknown or mismatched signatures - For Claude targets: lenient - lets Claude validate its own signatures +**Cache Control Handling (Issue #189):** +- Claude Code CLI sends `cache_control` fields on content blocks for prompt caching +- Cloud Code API rejects these with "Extra inputs are not permitted" +- `cleanCacheControl(messages)` strips cache_control from ALL block types at pipeline entry +- Called at the START of `convertAnthropicToGoogle()` before any other processing +- Additional sanitizers (`sanitizeTextBlock`, `sanitizeToolUseBlock`) provide defense-in-depth +- Pattern inspired by Antigravity-Manager's `clean_cache_control_from_messages()` + **Native Module Auto-Rebuild:** - When Node.js is updated, native modules like `better-sqlite3` may become incompatible - The proxy automatically detects `NODE_MODULE_VERSION` mismatch errors @@ -284,7 +298,9 @@ Each account object in `accounts.json` contains: - ARIA labels on search inputs and icon buttons - Keyboard navigation support (Escape to clear search) - **Security**: Optional password protection via `WEBUI_PASSWORD` env var +- **Config Redaction**: Sensitive values (passwords, tokens) are redacted in API responses - **Smart Refresh**: Client-side polling with ±20% jitter and tab visibility detection (3x slower when hidden) +- **i18n Support**: English, Chinese (中文), Indonesian (Bahasa), Portuguese (PT-BR) ## Testing Notes diff --git a/README.md b/README.md index c1e36cd..5658dff 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,8 @@ Choose one of the following methods to authorize the proxy: 2. Navigate to the **Accounts** tab and click **Add Account**. 3. Complete the Google OAuth authorization in the popup window. +> **Headless/Remote Servers**: If running on a server without a browser, the WebUI supports a "Manual Authorization" mode. After clicking "Add Account", you can copy the OAuth URL, complete authorization on your local machine, and paste the authorization code back. + #### **Method B: CLI (Desktop or Headless)** If you prefer the terminal or are on a remote server: @@ -280,7 +282,7 @@ Choose a strategy based on your needs: | Strategy | Best For | Description | | --- | --- | --- | -| **Hybrid** (Default) | Most users | Smart selection combining health score, token bucket rate limiting, and LRU freshness | +| **Hybrid** (Default) | Most users | Smart selection combining health score, token bucket rate limiting, quota awareness, and LRU freshness | | **Sticky** | Prompt caching | Stays on the same account to maximize cache hits, switches only when rate-limited | | **Round-Robin** | Even distribution | Cycles through accounts sequentially for balanced load | @@ -298,6 +300,8 @@ antigravity-claude-proxy start --strategy=round-robin # Load-balanced - **Health Score Tracking**: Accounts earn points for successful requests and lose points for failures/rate-limits - **Token Bucket Rate Limiting**: Client-side throttling with regenerating tokens (50 max, 6/minute) +- **Quota Awareness**: Accounts with critical quota (<5%) are deprioritized; exhausted accounts trigger emergency fallback +- **Emergency Fallback**: When all accounts appear exhausted, bypasses checks with throttle delays (250-500ms) - **Automatic Cooldown**: Rate-limited accounts recover automatically after reset time expires - **Invalid Account Detection**: Accounts needing re-authentication are marked and skipped - **Prompt Caching Support**: Session IDs derived from conversation enable cache hits across turns @@ -340,13 +344,14 @@ The proxy includes a built-in, modern web interface for real-time monitoring and - **Real-time Dashboard**: Monitor request volume, active accounts, model health, and subscription tier distribution. - **Visual Model Quota**: Track per-model usage and next reset times with color-coded progress indicators. - **Account Management**: Add/remove Google accounts via OAuth, view subscription tiers (Free/Pro/Ultra) and quota status at a glance. +- **Manual OAuth Mode**: Add accounts on headless servers by copying the OAuth URL and pasting the authorization code. - **Claude CLI Configuration**: Edit your `~/.claude/settings.json` directly from the browser. - **Persistent History**: Tracks request volume by model family for 30 days, persisting across server restarts. - **Time Range Filtering**: Analyze usage trends over 1H, 6H, 24H, 7D, or All Time periods. - **Smart Analysis**: Auto-select top 5 most used models or toggle between Family/Model views. - **Live Logs**: Stream server logs with level-based filtering and search. - **Advanced Tuning**: Configure retries, timeouts, and debug mode on the fly. -- **Bilingual Interface**: Full support for English and Chinese (switch via Settings). +- **Multi-language Interface**: Full support for English, Chinese (中文), Indonesian (Bahasa), and Portuguese (PT-BR). --- @@ -360,9 +365,11 @@ While most users can use the default settings, you can tune the proxy behavior v - **WebUI Password**: Secure your dashboard with `WEBUI_PASSWORD` env var or in config. - **Custom Port**: Change the default `8080` port. - **Retry Logic**: Configure `maxRetries`, `retryBaseMs`, and `retryMaxMs`. +- **Rate Limit Handling**: Comprehensive rate limit detection from headers and error messages with intelligent retry-after parsing. - **Load Balancing**: Adjust `defaultCooldownMs` and `maxWaitBeforeErrorMs`. - **Persistence**: Enable `persistTokenCache` to save OAuth sessions across restarts. - **Max Accounts**: Set `maxAccounts` (1-100) to limit the number of Google accounts. Default: 10. +- **Endpoint Fallback**: Automatic 403/404 endpoint fallback for API compatibility. Refer to `config.example.json` for a complete list of fields and documentation. @@ -421,6 +428,7 @@ npm run test:interleaved # Interleaved thinking npm run test:images # Image processing npm run test:caching # Prompt caching npm run test:strategies # Account selection strategies +npm run test:cache-control # Cache control field stripping ``` --- diff --git a/package.json b/package.json index d32b26a..ad4e3b9 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,8 @@ "test:oauth": "node tests/test-oauth-no-browser.cjs", "test:emptyretry": "node tests/test-empty-response-retry.cjs", "test:sanitizer": "node tests/test-schema-sanitizer.cjs", - "test:strategies": "node tests/test-strategies.cjs" + "test:strategies": "node tests/test-strategies.cjs", + "test:cache-control": "node tests/test-cache-control.cjs" }, "keywords": [ "claude", diff --git a/src/format/request-converter.js b/src/format/request-converter.js index 9ec1416..60dd05e 100644 --- a/src/format/request-converter.js +++ b/src/format/request-converter.js @@ -18,7 +18,8 @@ import { hasGeminiHistory, hasUnsignedThinkingBlocks, needsThinkingRecovery, - closeToolLoopForThinking + closeToolLoopForThinking, + cleanCacheControl } from './thinking-utils.js'; import { logger } from '../utils/logger.js'; @@ -32,7 +33,13 @@ import { logger } from '../utils/logger.js'; * @returns {Object} Request body for Cloud Code API */ export function convertAnthropicToGoogle(anthropicRequest) { - const { messages, system, max_tokens, temperature, top_p, top_k, stop_sequences, tools, tool_choice, thinking } = anthropicRequest; + // [CRITICAL FIX] Pre-clean all cache_control fields from messages (Issue #189) + // Claude Code CLI sends cache_control on various content blocks, but Cloud Code API + // rejects them with "Extra inputs are not permitted". Clean them proactively here + // before any other processing, following the pattern from Antigravity-Manager. + const messages = cleanCacheControl(anthropicRequest.messages || []); + + const { system, max_tokens, temperature, top_p, top_k, stop_sequences, tools, tool_choice, thinking } = anthropicRequest; const modelName = anthropicRequest.model || ''; const modelFamily = getModelFamily(modelName); const isClaudeModel = modelFamily === 'claude'; diff --git a/src/format/thinking-utils.js b/src/format/thinking-utils.js index 0562b36..e144e79 100644 --- a/src/format/thinking-utils.js +++ b/src/format/thinking-utils.js @@ -7,6 +7,62 @@ import { MIN_SIGNATURE_LENGTH } from '../constants.js'; import { getCachedSignatureFamily } from './signature-cache.js'; import { logger } from '../utils/logger.js'; +// ============================================================================ +// Cache Control Cleaning (Issue #189) +// ============================================================================ + +/** + * Remove cache_control fields from all content blocks in messages. + * This is a critical fix for Issue #189 where Claude Code CLI sends cache_control + * fields that the Cloud Code API rejects with "Extra inputs are not permitted". + * + * Inspired by Antigravity-Manager's clean_cache_control_from_messages() approach, + * this function proactively strips cache_control from ALL block types at the + * entry point of the conversion pipeline. + * + * @param {Array} messages - Array of messages in Anthropic format + * @returns {Array} Messages with cache_control fields removed + */ +export function cleanCacheControl(messages) { + if (!Array.isArray(messages)) return messages; + + let removedCount = 0; + + const cleaned = messages.map(message => { + if (!message || typeof message !== 'object') return message; + + // Handle string content (no cache_control possible) + if (typeof message.content === 'string') return message; + + // Handle array content + if (!Array.isArray(message.content)) return message; + + const cleanedContent = message.content.map(block => { + if (!block || typeof block !== 'object') return block; + + // Check if cache_control exists before destructuring + if (block.cache_control === undefined) return block; + + // Create a shallow copy without cache_control + const { cache_control, ...cleanBlock } = block; + removedCount++; + + return cleanBlock; + }); + + return { + ...message, + content: cleanedContent + }; + }); + + if (removedCount > 0) { + logger.debug(`[ThinkingUtils] Removed cache_control from ${removedCount} block(s)`); + } + + return cleaned; +} + /** * Check if a part is a thinking block * @param {Object} part - Content part to check @@ -104,6 +160,38 @@ function sanitizeAnthropicThinkingBlock(block) { return block; } +/** + * Sanitize a text block by removing extra fields like cache_control. + * Only keeps: type, text + * @param {Object} block - Text block to sanitize + * @returns {Object} Sanitized text block + */ +function sanitizeTextBlock(block) { + if (!block || block.type !== 'text') return block; + + const sanitized = { type: 'text' }; + if (block.text !== undefined) sanitized.text = block.text; + return sanitized; +} + +/** + * Sanitize a tool_use block by removing extra fields like cache_control. + * Only keeps: type, id, name, input, thoughtSignature (for Gemini) + * @param {Object} block - Tool_use block to sanitize + * @returns {Object} Sanitized tool_use block + */ +function sanitizeToolUseBlock(block) { + if (!block || block.type !== 'tool_use') return block; + + const sanitized = { type: 'tool_use' }; + if (block.id !== undefined) sanitized.id = block.id; + if (block.name !== undefined) sanitized.name = block.name; + if (block.input !== undefined) sanitized.input = block.input; + // Preserve thoughtSignature for Gemini models + if (block.thoughtSignature !== undefined) sanitized.thoughtSignature = block.thoughtSignature; + return sanitized; +} + /** * Filter content array, keeping only thinking blocks with valid signatures. */ @@ -259,11 +347,13 @@ export function reorderAssistantContent(content) { // Sanitize thinking blocks to remove cache_control and other extra fields thinkingBlocks.push(sanitizeAnthropicThinkingBlock(block)); } else if (block.type === 'tool_use') { - toolUseBlocks.push(block); + // Sanitize tool_use blocks to remove cache_control and other extra fields + toolUseBlocks.push(sanitizeToolUseBlock(block)); } else if (block.type === 'text') { // Only keep text blocks with meaningful content if (block.text && block.text.trim().length > 0) { - textBlocks.push(block); + // Sanitize text blocks to remove cache_control and other extra fields + textBlocks.push(sanitizeTextBlock(block)); } else { droppedEmptyBlocks++; } diff --git a/tests/frontend/test-frontend-dashboard.cjs b/tests/frontend/test-frontend-dashboard.cjs index 29b740f..61b387d 100644 --- a/tests/frontend/test-frontend-dashboard.cjs +++ b/tests/frontend/test-frontend-dashboard.cjs @@ -91,11 +91,12 @@ const tests = [ const res = await request('/views/dashboard.html'); const html = res.data; + // Dashboard uses dropdown-based filters for time range, display mode, and model selection const filterElements = [ - 'filters.account', // Account filter - 'filters.family', // Model family filter - 'filters.search', // Search input - 'computeQuotaRows' // Filter action + 'showTimeRangeDropdown', // Time range dropdown toggle + 'showDisplayModeDropdown', // Display mode dropdown toggle + 'showModelFilter', // Model/family filter dropdown toggle + 'setTimeRange' // Time range action ]; const missing = filterElements.filter(el => !html.includes(el)); @@ -106,23 +107,24 @@ const tests = [ } }, { - name: 'Dashboard table has required columns', + name: 'Dashboard has chart and visualization elements', async run() { const res = await request('/views/dashboard.html'); const html = res.data; - const columns = [ - 'modelIdentity', // Model name column - 'globalQuota', // Quota column - 'nextReset', // Reset time column - 'distribution' // Account distribution column + // Dashboard now uses charts instead of tables + const visualElements = [ + 'quotaChart', // Quota distribution pie chart + 'usageTrendChart', // Usage trend line chart + 'usageStats.total', // Total usage stat + 'selectedFamilies' // Family selection for chart ]; - const missing = columns.filter(col => !html.includes(col)); + const missing = visualElements.filter(col => !html.includes(col)); if (missing.length > 0) { - throw new Error(`Missing table columns: ${missing.join(', ')}`); + throw new Error(`Missing visualization elements: ${missing.join(', ')}`); } - return 'All table columns present'; + return 'All chart and visualization elements present'; } } ]; diff --git a/tests/run-all.cjs b/tests/run-all.cjs index b8bec8f..a3e428b 100644 --- a/tests/run-all.cjs +++ b/tests/run-all.cjs @@ -10,6 +10,7 @@ const path = require('path'); const tests = [ { name: 'Account Selection Strategies', file: 'test-strategies.cjs' }, + { name: 'Cache Control Stripping', file: 'test-cache-control.cjs' }, { name: 'Thinking Signatures', file: 'test-thinking-signatures.cjs' }, { name: 'Multi-turn Tools (Non-Streaming)', file: 'test-multiturn-thinking-tools.cjs' }, { name: 'Multi-turn Tools (Streaming)', file: 'test-multiturn-thinking-tools-streaming.cjs' }, diff --git a/tests/test-cache-control.cjs b/tests/test-cache-control.cjs new file mode 100644 index 0000000..c349ce4 --- /dev/null +++ b/tests/test-cache-control.cjs @@ -0,0 +1,297 @@ +/** + * Cache Control Field Test (Issue #189) + * + * Tests that cache_control fields on content blocks are properly stripped + * before being sent to the Cloud Code API. + * + * Claude Code CLI sends cache_control on text, thinking, tool_use, tool_result, + * image, and document blocks for prompt caching optimization. The Cloud Code API + * rejects these with "Extra inputs are not permitted". + * + * This test verifies that: + * 1. Text blocks with cache_control work correctly + * 2. Multi-turn conversations with cache_control on assistant content work + * 3. Tool_result blocks with cache_control work correctly + * + * Runs for both Claude and Gemini model families. + */ +const { streamRequest, analyzeContent, commonTools } = require('./helpers/http-client.cjs'); +const { getTestModels, getModelConfig } = require('./helpers/test-models.cjs'); + +const tools = [commonTools.getWeather]; + +async function runTestsForModel(family, model) { + console.log('='.repeat(60)); + console.log(`CACHE CONTROL TEST [${family.toUpperCase()}]`); + console.log(`Model: ${model}`); + console.log('Tests that cache_control fields are stripped from all block types'); + console.log('='.repeat(60)); + console.log(''); + + let allPassed = true; + const results = []; + const modelConfig = getModelConfig(family); + + // ===== TEST 1: User text block with cache_control ===== + console.log('TEST 1: User text block with cache_control'); + console.log('-'.repeat(40)); + + try { + const test1Result = await streamRequest({ + model, + max_tokens: modelConfig.max_tokens, + stream: true, + thinking: modelConfig.thinking, + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'What is the capital of France? Reply in one word.', + cache_control: { type: 'ephemeral' } + } + ] + } + ] + }); + + const hasError1 = test1Result.events.some(e => e.type === 'error'); + const errorMsg1 = hasError1 + ? test1Result.events.find(e => e.type === 'error')?.data?.error?.message + : null; + + console.log(` Response received: ${test1Result.content.length > 0 ? 'YES' : 'NO'}`); + console.log(` Has error: ${hasError1 ? 'YES' : 'NO'}`); + if (hasError1) { + console.log(` Error message: ${errorMsg1}`); + } + + const content1 = analyzeContent(test1Result.content); + if (content1.hasText) { + console.log(` Response preview: "${content1.text[0].text.substring(0, 50)}..."`); + } + + const test1Pass = !hasError1 && test1Result.content.length > 0; + results.push({ name: 'User text block with cache_control', passed: test1Pass }); + console.log(` Result: ${test1Pass ? 'PASS' : 'FAIL'}`); + if (!test1Pass) allPassed = false; + } catch (err) { + console.log(` ERROR: ${err.message}`); + results.push({ name: 'User text block with cache_control', passed: false }); + allPassed = false; + } + + // ===== TEST 2: Multi-turn with cache_control on assistant content ===== + console.log('\nTEST 2: Multi-turn with cache_control on assistant content'); + console.log('-'.repeat(40)); + + try { + // First turn - get a response + const turn1 = await streamRequest({ + model, + max_tokens: modelConfig.max_tokens, + stream: true, + thinking: modelConfig.thinking, + messages: [ + { role: 'user', content: 'Say hello.' } + ] + }); + + if (turn1.content.length === 0) { + console.log(' SKIPPED - Turn 1 returned empty response'); + results.push({ name: 'Multi-turn with cache_control', passed: false, skipped: true }); + } else { + // Add cache_control to ALL blocks in assistant response (simulating Claude Code) + const modifiedContent = turn1.content.map(block => ({ + ...block, + cache_control: { type: 'ephemeral' } + })); + + // Second turn - use modified content with cache_control + const turn2 = await streamRequest({ + model, + max_tokens: modelConfig.max_tokens, + stream: true, + thinking: modelConfig.thinking, + messages: [ + { role: 'user', content: 'Say hello.' }, + { role: 'assistant', content: modifiedContent }, + { + role: 'user', + content: [ + { + type: 'text', + text: 'Now say goodbye.', + cache_control: { type: 'ephemeral' } + } + ] + } + ] + }); + + const hasError2 = turn2.events.some(e => e.type === 'error'); + const errorMsg2 = hasError2 + ? turn2.events.find(e => e.type === 'error')?.data?.error?.message + : null; + + console.log(` Turn 1 blocks: ${turn1.content.length}`); + console.log(` Turn 2 response received: ${turn2.content.length > 0 ? 'YES' : 'NO'}`); + console.log(` Has error: ${hasError2 ? 'YES' : 'NO'}`); + if (hasError2) { + console.log(` Error message: ${errorMsg2}`); + // Check specifically for cache_control error + if (errorMsg2 && errorMsg2.includes('cache_control')) { + console.log(' >>> cache_control field NOT stripped properly! <<<'); + } + } + + const content2 = analyzeContent(turn2.content); + if (content2.hasText) { + console.log(` Response preview: "${content2.text[0].text.substring(0, 50)}..."`); + } + + const test2Pass = !hasError2 && turn2.content.length > 0; + results.push({ name: 'Multi-turn with cache_control', passed: test2Pass }); + console.log(` Result: ${test2Pass ? 'PASS' : 'FAIL'}`); + if (!test2Pass) allPassed = false; + } + } catch (err) { + console.log(` ERROR: ${err.message}`); + results.push({ name: 'Multi-turn with cache_control', passed: false }); + allPassed = false; + } + + // ===== TEST 3: Tool loop with cache_control on tool_result ===== + console.log('\nTEST 3: Tool loop with cache_control on tool_result'); + console.log('-'.repeat(40)); + + try { + // First turn - request tool use + const toolTurn1 = await streamRequest({ + model, + max_tokens: modelConfig.max_tokens, + stream: true, + tools, + thinking: modelConfig.thinking, + messages: [ + { role: 'user', content: 'What is the weather in Tokyo? Use the get_weather tool.' } + ] + }); + + const content3a = analyzeContent(toolTurn1.content); + + if (!content3a.hasToolUse) { + console.log(' SKIPPED - Model did not use tool in turn 1'); + results.push({ name: 'Tool_result with cache_control', passed: true, skipped: true }); + } else { + const toolUseId = content3a.toolUse[0].id; + console.log(` Tool use ID: ${toolUseId}`); + + // Second turn - provide tool result with cache_control + const toolTurn2 = await streamRequest({ + model, + max_tokens: modelConfig.max_tokens, + stream: true, + tools, + thinking: modelConfig.thinking, + messages: [ + { role: 'user', content: 'What is the weather in Tokyo? Use the get_weather tool.' }, + { role: 'assistant', content: toolTurn1.content }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: toolUseId, + content: 'The weather in Tokyo is 22°C and partly cloudy.', + cache_control: { type: 'ephemeral' } + } + ] + } + ] + }); + + const hasError3 = toolTurn2.events.some(e => e.type === 'error'); + const errorMsg3 = hasError3 + ? toolTurn2.events.find(e => e.type === 'error')?.data?.error?.message + : null; + + console.log(` Turn 2 response received: ${toolTurn2.content.length > 0 ? 'YES' : 'NO'}`); + console.log(` Has error: ${hasError3 ? 'YES' : 'NO'}`); + if (hasError3) { + console.log(` Error message: ${errorMsg3}`); + if (errorMsg3 && errorMsg3.includes('cache_control')) { + console.log(' >>> cache_control field NOT stripped properly! <<<'); + } + } + + const content3b = analyzeContent(toolTurn2.content); + if (content3b.hasText) { + console.log(` Response preview: "${content3b.text[0].text.substring(0, 50)}..."`); + } + + const test3Pass = !hasError3 && toolTurn2.content.length > 0; + results.push({ name: 'Tool_result with cache_control', passed: test3Pass }); + console.log(` Result: ${test3Pass ? 'PASS' : 'FAIL'}`); + if (!test3Pass) allPassed = false; + } + } catch (err) { + console.log(` ERROR: ${err.message}`); + results.push({ name: 'Tool_result with cache_control', passed: false }); + allPassed = false; + } + + // ===== Summary ===== + console.log('\n' + '='.repeat(60)); + console.log(`SUMMARY [${family.toUpperCase()}]`); + console.log('='.repeat(60)); + + for (const result of results) { + const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL'); + console.log(` [${status}] ${result.name}`); + } + + const passedCount = results.filter(r => r.passed && !r.skipped).length; + const skippedCount = results.filter(r => r.skipped).length; + const totalTests = results.length - skippedCount; + + console.log('\n' + '='.repeat(60)); + console.log(`[${family.toUpperCase()}] ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'} (${passedCount}/${totalTests})`); + console.log('='.repeat(60)); + + return allPassed; +} + +async function runTests() { + console.log(''); + console.log('='.repeat(60)); + console.log('CACHE CONTROL FIELD STRIPPING TEST (Issue #189)'); + console.log('='.repeat(60)); + console.log(''); + console.log('This test verifies that cache_control fields are properly'); + console.log('stripped from all content blocks before sending to Cloud Code API.'); + console.log(''); + + const models = await getTestModels(); + let allPassed = true; + + for (const { family, model } of models) { + console.log('\n'); + const passed = await runTestsForModel(family, model); + if (!passed) allPassed = false; + } + + console.log('\n' + '='.repeat(60)); + console.log('FINAL RESULT'); + console.log('='.repeat(60)); + console.log(`Overall: ${allPassed ? 'ALL MODEL FAMILIES PASSED' : 'SOME MODEL FAMILIES FAILED'}`); + console.log('='.repeat(60)); + + process.exit(allPassed ? 0 : 1); +} + +runTests().catch(err => { + console.error('Test failed with error:', err); + process.exit(1); +}); diff --git a/tests/test-strategies.cjs b/tests/test-strategies.cjs index 9cb7107..643403b 100644 --- a/tests/test-strategies.cjs +++ b/tests/test-strategies.cjs @@ -77,6 +77,12 @@ async function runTests() { } } + function assertNotNull(value, message = '') { + if (value === null || value === undefined) { + throw new Error(`${message}\nExpected non-null value but got: ${value}`); + } + } + function assertWithin(actual, min, max, message = '') { if (actual < min || actual > max) { throw new Error(`${message}\nExpected value between ${min} and ${max}, got: ${actual}`); @@ -691,7 +697,7 @@ async function runTests() { assertEqual(result.account.email, 'account3@example.com', 'Oldest account should be selected'); }); - test('HybridStrategy: filters out unhealthy accounts', () => { + test('HybridStrategy: uses emergency fallback for unhealthy accounts', () => { const strategy = new HybridStrategy({ healthScore: { initial: 40, minUsable: 50 }, tokenBucket: { initialTokens: 50, maxTokens: 50 } @@ -699,19 +705,25 @@ async function runTests() { const accounts = createMockAccounts(3); // All accounts start with health 40, which is below minUsable 50 + // But emergency fallback should still return an account const result = strategy.selectAccount(accounts, 'model'); - assertNull(result.account, 'Should filter all accounts with low health'); + assertNotNull(result.account, 'Emergency fallback should return an account'); + // waitMs indicates fallback was used (250ms for emergency) + assertTrue(result.waitMs >= 250, 'Emergency fallback should add throttle delay'); }); - test('HybridStrategy: filters out accounts without tokens', () => { + test('HybridStrategy: uses last resort fallback for accounts without tokens', () => { const strategy = new HybridStrategy({ healthScore: { initial: 70 }, tokenBucket: { initialTokens: 0, maxTokens: 50 } }); const accounts = createMockAccounts(3); + // No tokens, but last resort fallback should still return an account const result = strategy.selectAccount(accounts, 'model'); - assertNull(result.account, 'Should filter all accounts without tokens'); + assertNotNull(result.account, 'Last resort fallback should return an account'); + // waitMs indicates fallback was used (500ms for lastResort) + assertTrue(result.waitMs >= 500, 'Last resort fallback should add throttle delay'); }); test('HybridStrategy: consumes token on selection', () => { @@ -993,7 +1005,7 @@ async function runTests() { assertEqual(result.account.email, 'account2@example.com'); }); - test('Integration: Token consumption limits requests', () => { + test('Integration: Token exhaustion triggers last resort fallback', () => { const strategy = new HybridStrategy({ tokenBucket: { initialTokens: 2, maxTokens: 10 } }); @@ -1003,9 +1015,11 @@ async function runTests() { strategy.selectAccount(accounts, 'model'); // 2 -> 1 strategy.selectAccount(accounts, 'model'); // 1 -> 0 - // Third request should fail (no tokens) + // Third request should use last resort fallback (not null) const result = strategy.selectAccount(accounts, 'model'); - assertNull(result.account, 'Should return null when tokens exhausted'); + assertNotNull(result.account, 'Last resort fallback should return an account'); + // waitMs indicates fallback was used (500ms for lastResort) + assertTrue(result.waitMs >= 500, 'Last resort fallback should add throttle delay'); }); test('Integration: Multi-model rate limiting is independent', () => {