Files
antigravity-claude-proxy/tests/stress-test.cjs
Badri Narayanan S 5a85f0cfcc feat: comprehensive rate limit handling overhaul (inspired by opencode-antigravity-auth)
This commit addresses "Max retries exceeded" errors during stress testing where
all accounts would become exhausted simultaneously due to short per-second rate
limits triggering cascading failures.

## Rate Limit Parser (`rate-limit-parser.js`)
- Remove 2s buffer enforcement that caused cascading failures when API returned
  short reset times (200-600ms). Now adds 200ms buffer for sub-500ms resets
- Add `parseRateLimitReason()` for smart backoff based on error type:
  QUOTA_EXHAUSTED, RATE_LIMIT_EXCEEDED, MODEL_CAPACITY_EXHAUSTED, SERVER_ERROR

## Message/Streaming Handlers
- Add per-account+model rate limit state tracking with exponential backoff
- For short rate limits (< 1 second), wait and retry on same account instead
  of switching - prevents thundering herd when all accounts hit per-second limits
- Add throttle wait support for fallback modes (emergency/lastResort)
- Add `calculateSmartBackoff()` with progressive tiers by error type

## HybridStrategy (`hybrid-strategy.js`)
- Refactor `#getCandidates()` to return 4 fallback levels:
  - `normal`: All filters pass (health, tokens, quota)
  - `quota`: Bypass critical quota check
  - `emergency`: Bypass health check when ALL accounts unhealthy
  - `lastResort`: Bypass BOTH health AND token bucket checks
- Add throttle wait times: 500ms for lastResort, 250ms for emergency
- Fix LRU calculation to use seconds (matches opencode-antigravity-auth)

## Health Tracker
- Increase `recoveryPerHour` from 2 to 10 for faster recovery (1 hour vs 5 hours)

## Account Manager
- Add consecutive failure tracking: `getConsecutiveFailures()`,
  `incrementConsecutiveFailures()`, `resetConsecutiveFailures()`
- Add cooldown mechanism separate from rate limits with `CooldownReason`
- Reset consecutive failures on successful request

## Base Strategy
- Add `isAccountCoolingDown()` check in `isAccountUsable()`

## Constants
- Replace fixed `CAPACITY_RETRY_DELAY_MS` with progressive `CAPACITY_BACKOFF_TIERS_MS`
- Add `BACKOFF_BY_ERROR_TYPE` for smart backoff
- Add `QUOTA_EXHAUSTED_BACKOFF_TIERS_MS` for progressive quota backoff
- Add `MIN_BACKOFF_MS` floor to prevent "Available in 0s" loops
- Increase `MAX_CAPACITY_RETRIES` from 3 to 5
- Reduce `RATE_LIMIT_DEDUP_WINDOW_MS` from 5s to 2s

## Frontend
- Remove `capacityRetryDelayMs` config (replaced by progressive tiers)
- Update default `maxCapacityRetries` display from 3 to 5

## Testing
- Add `tests/stress-test.cjs` for concurrent request stress testing

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-24 22:43:53 +05:30

94 lines
3.2 KiB
JavaScript

/**
* Stress Test - Send multiple concurrent requests to test rate limit handling
*
* Usage: node tests/stress-test.cjs [count] [model]
* Example: node tests/stress-test.cjs 10 gemini-3-flash
*/
const BASE_URL = process.env.ANTHROPIC_BASE_URL || 'http://localhost:8080';
const count = parseInt(process.argv[2]) || 8;
const model = process.argv[3] || 'gemini-3-flash';
async function sendRequest(id) {
const startTime = Date.now();
try {
const response = await fetch(`${BASE_URL}/v1/messages`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01'
},
body: JSON.stringify({
model: model,
max_tokens: 100,
messages: [
{ role: 'user', content: `Request ${id}: Say "Hello ${id}" and nothing else.` }
]
})
});
const elapsed = Date.now() - startTime;
if (!response.ok) {
const errorText = await response.text();
console.log(`[${id}] ❌ ${response.status} after ${elapsed}ms: ${errorText.substring(0, 100)}`);
return { id, success: false, status: response.status, elapsed };
}
const data = await response.json();
const text = data.content?.[0]?.text?.substring(0, 50) || 'No text';
console.log(`[${id}] ✅ 200 after ${elapsed}ms: "${text}..."`);
return { id, success: true, status: 200, elapsed };
} catch (error) {
const elapsed = Date.now() - startTime;
console.log(`[${id}] ❌ Error after ${elapsed}ms: ${error.message}`);
return { id, success: false, error: error.message, elapsed };
}
}
async function runStressTest() {
console.log(`\n🚀 Stress Test: Sending ${count} concurrent requests to ${model}\n`);
console.log(`Target: ${BASE_URL}/v1/messages\n`);
console.log('─'.repeat(70));
const startTime = Date.now();
// Send all requests concurrently
const promises = [];
for (let i = 1; i <= count; i++) {
promises.push(sendRequest(i));
}
const results = await Promise.all(promises);
const totalElapsed = Date.now() - startTime;
console.log('─'.repeat(70));
// Summary
const successful = results.filter(r => r.success).length;
const failed = results.filter(r => !r.success).length;
const avgElapsed = Math.round(results.reduce((sum, r) => sum + r.elapsed, 0) / results.length);
console.log(`\n📊 Summary:`);
console.log(` Total time: ${totalElapsed}ms`);
console.log(` Successful: ${successful}/${count}`);
console.log(` Failed: ${failed}/${count}`);
console.log(` Avg response time: ${avgElapsed}ms`);
if (failed > 0) {
const errors = results.filter(r => !r.success);
const statusCounts = {};
errors.forEach(e => {
const key = e.status || 'network';
statusCounts[key] = (statusCounts[key] || 0) + 1;
});
console.log(` Error breakdown: ${JSON.stringify(statusCounts)}`);
}
console.log('');
}
runStressTest().catch(console.error);