fix: strip cache_control fields from content blocks (#189)

Claude Code CLI sends cache_control on text, thinking, tool_use, and tool_result blocks for prompt caching. Cloud Code API rejects these with "Extra inputs are not permitted". - Add cleanCacheControl() to proactively strip cache_control at pipeline entry - Add sanitizeTextBlock() and sanitizeToolUseBlock() for defense-in-depth - Update reorderAssistantContent() to use block sanitizers - Add test-cache-control.cjs with multi-model test coverage - Update frontend dashboard tests to match current UI design - Update strategy tests to match v2.4.0 fallback behavior - Update CLAUDE.md and README.md with recent features Inspired by Antigravity-Manager's clean_cache_control_from_messages() pattern. Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-25 03:27:05 +05:30
parent 6cadaee928
commit 683ca41480
9 changed files with 466 additions and 30 deletions
--- a/tests/frontend/test-frontend-dashboard.cjs
+++ b/tests/frontend/test-frontend-dashboard.cjs
@@ -91,11 +91,12 @@ const tests = [
            const res = await request('/views/dashboard.html');
            const html = res.data;

+            // Dashboard uses dropdown-based filters for time range, display mode, and model selection
            const filterElements = [
-                'filters.account',    // Account filter
-                'filters.family',     // Model family filter
-                'filters.search',     // Search input
-                'computeQuotaRows'    // Filter action
+                'showTimeRangeDropdown',  // Time range dropdown toggle
+                'showDisplayModeDropdown', // Display mode dropdown toggle
+                'showModelFilter',         // Model/family filter dropdown toggle
+                'setTimeRange'             // Time range action
            ];

            const missing = filterElements.filter(el => !html.includes(el));
@@ -106,23 +107,24 @@ const tests = [
        }
    },
    {
-        name: 'Dashboard table has required columns',
+        name: 'Dashboard has chart and visualization elements',
        async run() {
            const res = await request('/views/dashboard.html');
            const html = res.data;

-            const columns = [
-                'modelIdentity',      // Model name column
-                'globalQuota',        // Quota column
-                'nextReset',          // Reset time column
-                'distribution'        // Account distribution column
+            // Dashboard now uses charts instead of tables
+            const visualElements = [
+                'quotaChart',           // Quota distribution pie chart
+                'usageTrendChart',      // Usage trend line chart
+                'usageStats.total',     // Total usage stat
+                'selectedFamilies'      // Family selection for chart
            ];

-            const missing = columns.filter(col => !html.includes(col));
+            const missing = visualElements.filter(col => !html.includes(col));
            if (missing.length > 0) {
-                throw new Error(`Missing table columns: ${missing.join(', ')}`);
+                throw new Error(`Missing visualization elements: ${missing.join(', ')}`);
            }
-            return 'All table columns present';
+            return 'All chart and visualization elements present';
        }
    }
 ];
--- a/tests/run-all.cjs
+++ b/tests/run-all.cjs
@@ -10,6 +10,7 @@ const path = require('path');

 const tests = [
    { name: 'Account Selection Strategies', file: 'test-strategies.cjs' },
+    { name: 'Cache Control Stripping', file: 'test-cache-control.cjs' },
    { name: 'Thinking Signatures', file: 'test-thinking-signatures.cjs' },
    { name: 'Multi-turn Tools (Non-Streaming)', file: 'test-multiturn-thinking-tools.cjs' },
    { name: 'Multi-turn Tools (Streaming)', file: 'test-multiturn-thinking-tools-streaming.cjs' },
--- a/tests/test-cache-control.cjs
+++ b/tests/test-cache-control.cjs
@@ -0,0 +1,297 @@
+/**
+ * Cache Control Field Test (Issue #189)
+ *
+ * Tests that cache_control fields on content blocks are properly stripped
+ * before being sent to the Cloud Code API.
+ *
+ * Claude Code CLI sends cache_control on text, thinking, tool_use, tool_result,
+ * image, and document blocks for prompt caching optimization. The Cloud Code API
+ * rejects these with "Extra inputs are not permitted".
+ *
+ * This test verifies that:
+ * 1. Text blocks with cache_control work correctly
+ * 2. Multi-turn conversations with cache_control on assistant content work
+ * 3. Tool_result blocks with cache_control work correctly
+ *
+ * Runs for both Claude and Gemini model families.
+ */
+const { streamRequest, analyzeContent, commonTools } = require('./helpers/http-client.cjs');
+const { getTestModels, getModelConfig } = require('./helpers/test-models.cjs');
+
+const tools = [commonTools.getWeather];
+
+async function runTestsForModel(family, model) {
+    console.log('='.repeat(60));
+    console.log(`CACHE CONTROL TEST [${family.toUpperCase()}]`);
+    console.log(`Model: ${model}`);
+    console.log('Tests that cache_control fields are stripped from all block types');
+    console.log('='.repeat(60));
+    console.log('');
+
+    let allPassed = true;
+    const results = [];
+    const modelConfig = getModelConfig(family);
+
+    // ===== TEST 1: User text block with cache_control =====
+    console.log('TEST 1: User text block with cache_control');
+    console.log('-'.repeat(40));
+
+    try {
+        const test1Result = await streamRequest({
+            model,
+            max_tokens: modelConfig.max_tokens,
+            stream: true,
+            thinking: modelConfig.thinking,
+            messages: [
+                {
+                    role: 'user',
+                    content: [
+                        {
+                            type: 'text',
+                            text: 'What is the capital of France? Reply in one word.',
+                            cache_control: { type: 'ephemeral' }
+                        }
+                    ]
+                }
+            ]
+        });
+
+        const hasError1 = test1Result.events.some(e => e.type === 'error');
+        const errorMsg1 = hasError1
+            ? test1Result.events.find(e => e.type === 'error')?.data?.error?.message
+            : null;
+
+        console.log(`  Response received: ${test1Result.content.length > 0 ? 'YES' : 'NO'}`);
+        console.log(`  Has error: ${hasError1 ? 'YES' : 'NO'}`);
+        if (hasError1) {
+            console.log(`  Error message: ${errorMsg1}`);
+        }
+
+        const content1 = analyzeContent(test1Result.content);
+        if (content1.hasText) {
+            console.log(`  Response preview: "${content1.text[0].text.substring(0, 50)}..."`);
+        }
+
+        const test1Pass = !hasError1 && test1Result.content.length > 0;
+        results.push({ name: 'User text block with cache_control', passed: test1Pass });
+        console.log(`  Result: ${test1Pass ? 'PASS' : 'FAIL'}`);
+        if (!test1Pass) allPassed = false;
+    } catch (err) {
+        console.log(`  ERROR: ${err.message}`);
+        results.push({ name: 'User text block with cache_control', passed: false });
+        allPassed = false;
+    }
+
+    // ===== TEST 2: Multi-turn with cache_control on assistant content =====
+    console.log('\nTEST 2: Multi-turn with cache_control on assistant content');
+    console.log('-'.repeat(40));
+
+    try {
+        // First turn - get a response
+        const turn1 = await streamRequest({
+            model,
+            max_tokens: modelConfig.max_tokens,
+            stream: true,
+            thinking: modelConfig.thinking,
+            messages: [
+                { role: 'user', content: 'Say hello.' }
+            ]
+        });
+
+        if (turn1.content.length === 0) {
+            console.log('  SKIPPED - Turn 1 returned empty response');
+            results.push({ name: 'Multi-turn with cache_control', passed: false, skipped: true });
+        } else {
+            // Add cache_control to ALL blocks in assistant response (simulating Claude Code)
+            const modifiedContent = turn1.content.map(block => ({
+                ...block,
+                cache_control: { type: 'ephemeral' }
+            }));
+
+            // Second turn - use modified content with cache_control
+            const turn2 = await streamRequest({
+                model,
+                max_tokens: modelConfig.max_tokens,
+                stream: true,
+                thinking: modelConfig.thinking,
+                messages: [
+                    { role: 'user', content: 'Say hello.' },
+                    { role: 'assistant', content: modifiedContent },
+                    {
+                        role: 'user',
+                        content: [
+                            {
+                                type: 'text',
+                                text: 'Now say goodbye.',
+                                cache_control: { type: 'ephemeral' }
+                            }
+                        ]
+                    }
+                ]
+            });
+
+            const hasError2 = turn2.events.some(e => e.type === 'error');
+            const errorMsg2 = hasError2
+                ? turn2.events.find(e => e.type === 'error')?.data?.error?.message
+                : null;
+
+            console.log(`  Turn 1 blocks: ${turn1.content.length}`);
+            console.log(`  Turn 2 response received: ${turn2.content.length > 0 ? 'YES' : 'NO'}`);
+            console.log(`  Has error: ${hasError2 ? 'YES' : 'NO'}`);
+            if (hasError2) {
+                console.log(`  Error message: ${errorMsg2}`);
+                // Check specifically for cache_control error
+                if (errorMsg2 && errorMsg2.includes('cache_control')) {
+                    console.log('  >>> cache_control field NOT stripped properly! <<<');
+                }
+            }
+
+            const content2 = analyzeContent(turn2.content);
+            if (content2.hasText) {
+                console.log(`  Response preview: "${content2.text[0].text.substring(0, 50)}..."`);
+            }
+
+            const test2Pass = !hasError2 && turn2.content.length > 0;
+            results.push({ name: 'Multi-turn with cache_control', passed: test2Pass });
+            console.log(`  Result: ${test2Pass ? 'PASS' : 'FAIL'}`);
+            if (!test2Pass) allPassed = false;
+        }
+    } catch (err) {
+        console.log(`  ERROR: ${err.message}`);
+        results.push({ name: 'Multi-turn with cache_control', passed: false });
+        allPassed = false;
+    }
+
+    // ===== TEST 3: Tool loop with cache_control on tool_result =====
+    console.log('\nTEST 3: Tool loop with cache_control on tool_result');
+    console.log('-'.repeat(40));
+
+    try {
+        // First turn - request tool use
+        const toolTurn1 = await streamRequest({
+            model,
+            max_tokens: modelConfig.max_tokens,
+            stream: true,
+            tools,
+            thinking: modelConfig.thinking,
+            messages: [
+                { role: 'user', content: 'What is the weather in Tokyo? Use the get_weather tool.' }
+            ]
+        });
+
+        const content3a = analyzeContent(toolTurn1.content);
+
+        if (!content3a.hasToolUse) {
+            console.log('  SKIPPED - Model did not use tool in turn 1');
+            results.push({ name: 'Tool_result with cache_control', passed: true, skipped: true });
+        } else {
+            const toolUseId = content3a.toolUse[0].id;
+            console.log(`  Tool use ID: ${toolUseId}`);
+
+            // Second turn - provide tool result with cache_control
+            const toolTurn2 = await streamRequest({
+                model,
+                max_tokens: modelConfig.max_tokens,
+                stream: true,
+                tools,
+                thinking: modelConfig.thinking,
+                messages: [
+                    { role: 'user', content: 'What is the weather in Tokyo? Use the get_weather tool.' },
+                    { role: 'assistant', content: toolTurn1.content },
+                    {
+                        role: 'user',
+                        content: [
+                            {
+                                type: 'tool_result',
+                                tool_use_id: toolUseId,
+                                content: 'The weather in Tokyo is 22°C and partly cloudy.',
+                                cache_control: { type: 'ephemeral' }
+                            }
+                        ]
+                    }
+                ]
+            });
+
+            const hasError3 = toolTurn2.events.some(e => e.type === 'error');
+            const errorMsg3 = hasError3
+                ? toolTurn2.events.find(e => e.type === 'error')?.data?.error?.message
+                : null;
+
+            console.log(`  Turn 2 response received: ${toolTurn2.content.length > 0 ? 'YES' : 'NO'}`);
+            console.log(`  Has error: ${hasError3 ? 'YES' : 'NO'}`);
+            if (hasError3) {
+                console.log(`  Error message: ${errorMsg3}`);
+                if (errorMsg3 && errorMsg3.includes('cache_control')) {
+                    console.log('  >>> cache_control field NOT stripped properly! <<<');
+                }
+            }
+
+            const content3b = analyzeContent(toolTurn2.content);
+            if (content3b.hasText) {
+                console.log(`  Response preview: "${content3b.text[0].text.substring(0, 50)}..."`);
+            }
+
+            const test3Pass = !hasError3 && toolTurn2.content.length > 0;
+            results.push({ name: 'Tool_result with cache_control', passed: test3Pass });
+            console.log(`  Result: ${test3Pass ? 'PASS' : 'FAIL'}`);
+            if (!test3Pass) allPassed = false;
+        }
+    } catch (err) {
+        console.log(`  ERROR: ${err.message}`);
+        results.push({ name: 'Tool_result with cache_control', passed: false });
+        allPassed = false;
+    }
+
+    // ===== Summary =====
+    console.log('\n' + '='.repeat(60));
+    console.log(`SUMMARY [${family.toUpperCase()}]`);
+    console.log('='.repeat(60));
+
+    for (const result of results) {
+        const status = result.skipped ? 'SKIP' : (result.passed ? 'PASS' : 'FAIL');
+        console.log(`  [${status}] ${result.name}`);
+    }
+
+    const passedCount = results.filter(r => r.passed && !r.skipped).length;
+    const skippedCount = results.filter(r => r.skipped).length;
+    const totalTests = results.length - skippedCount;
+
+    console.log('\n' + '='.repeat(60));
+    console.log(`[${family.toUpperCase()}] ${allPassed ? 'ALL TESTS PASSED' : 'SOME TESTS FAILED'} (${passedCount}/${totalTests})`);
+    console.log('='.repeat(60));
+
+    return allPassed;
+}
+
+async function runTests() {
+    console.log('');
+    console.log('='.repeat(60));
+    console.log('CACHE CONTROL FIELD STRIPPING TEST (Issue #189)');
+    console.log('='.repeat(60));
+    console.log('');
+    console.log('This test verifies that cache_control fields are properly');
+    console.log('stripped from all content blocks before sending to Cloud Code API.');
+    console.log('');
+
+    const models = await getTestModels();
+    let allPassed = true;
+
+    for (const { family, model } of models) {
+        console.log('\n');
+        const passed = await runTestsForModel(family, model);
+        if (!passed) allPassed = false;
+    }
+
+    console.log('\n' + '='.repeat(60));
+    console.log('FINAL RESULT');
+    console.log('='.repeat(60));
+    console.log(`Overall: ${allPassed ? 'ALL MODEL FAMILIES PASSED' : 'SOME MODEL FAMILIES FAILED'}`);
+    console.log('='.repeat(60));
+
+    process.exit(allPassed ? 0 : 1);
+}
+
+runTests().catch(err => {
+    console.error('Test failed with error:', err);
+    process.exit(1);
+});
--- a/tests/test-strategies.cjs
+++ b/tests/test-strategies.cjs
@@ -77,6 +77,12 @@ async function runTests() {
        }
    }

+    function assertNotNull(value, message = '') {
+        if (value === null || value === undefined) {
+            throw new Error(`${message}\nExpected non-null value but got: ${value}`);
+        }
+    }
+
    function assertWithin(actual, min, max, message = '') {
        if (actual < min || actual > max) {
            throw new Error(`${message}\nExpected value between ${min} and ${max}, got: ${actual}`);
@@ -691,7 +697,7 @@ async function runTests() {
        assertEqual(result.account.email, 'account3@example.com', 'Oldest account should be selected');
    });

-    test('HybridStrategy: filters out unhealthy accounts', () => {
+    test('HybridStrategy: uses emergency fallback for unhealthy accounts', () => {
        const strategy = new HybridStrategy({
            healthScore: { initial: 40, minUsable: 50 },
            tokenBucket: { initialTokens: 50, maxTokens: 50 }
@@ -699,19 +705,25 @@ async function runTests() {
        const accounts = createMockAccounts(3);

        // All accounts start with health 40, which is below minUsable 50
+        // But emergency fallback should still return an account
        const result = strategy.selectAccount(accounts, 'model');
-        assertNull(result.account, 'Should filter all accounts with low health');
+        assertNotNull(result.account, 'Emergency fallback should return an account');
+        // waitMs indicates fallback was used (250ms for emergency)
+        assertTrue(result.waitMs >= 250, 'Emergency fallback should add throttle delay');
    });

-    test('HybridStrategy: filters out accounts without tokens', () => {
+    test('HybridStrategy: uses last resort fallback for accounts without tokens', () => {
        const strategy = new HybridStrategy({
            healthScore: { initial: 70 },
            tokenBucket: { initialTokens: 0, maxTokens: 50 }
        });
        const accounts = createMockAccounts(3);

+        // No tokens, but last resort fallback should still return an account
        const result = strategy.selectAccount(accounts, 'model');
-        assertNull(result.account, 'Should filter all accounts without tokens');
+        assertNotNull(result.account, 'Last resort fallback should return an account');
+        // waitMs indicates fallback was used (500ms for lastResort)
+        assertTrue(result.waitMs >= 500, 'Last resort fallback should add throttle delay');
    });

    test('HybridStrategy: consumes token on selection', () => {
@@ -993,7 +1005,7 @@ async function runTests() {
        assertEqual(result.account.email, 'account2@example.com');
    });

-    test('Integration: Token consumption limits requests', () => {
+    test('Integration: Token exhaustion triggers last resort fallback', () => {
        const strategy = new HybridStrategy({
            tokenBucket: { initialTokens: 2, maxTokens: 10 }
        });
@@ -1003,9 +1015,11 @@ async function runTests() {
        strategy.selectAccount(accounts, 'model'); // 2 -> 1
        strategy.selectAccount(accounts, 'model'); // 1 -> 0

-        // Third request should fail (no tokens)
+        // Third request should use last resort fallback (not null)
        const result = strategy.selectAccount(accounts, 'model');
-        assertNull(result.account, 'Should return null when tokens exhausted');
+        assertNotNull(result.account, 'Last resort fallback should return an account');
+        // waitMs indicates fallback was used (500ms for lastResort)
+        assertTrue(result.waitMs >= 500, 'Last resort fallback should add throttle delay');
    });

    test('Integration: Multi-model rate limiting is independent', () => {