feat: add configurable account selection strategies

Refactor account selection into a strategy pattern with three options:
- Sticky: cache-optimized, stays on same account until rate-limited
- Round-robin: load-balanced, rotates every request
- Hybrid (default): smart distribution using health scores, token buckets, and LRU

The hybrid strategy uses multiple signals for optimal account selection:
health tracking for reliability, client-side token buckets for rate limiting,
and LRU freshness to prefer rested accounts.

Includes WebUI settings for strategy selection and unit tests.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2026-01-18 03:48:43 +05:30
parent 973234372b
commit 5ae19a5b72
31 changed files with 2721 additions and 353 deletions

View File

@@ -103,9 +103,24 @@ export const MAX_ACCOUNTS = config?.maxAccounts || 10; // From config or 10
// Rate limit wait thresholds
export const MAX_WAIT_BEFORE_ERROR_MS = config?.maxWaitBeforeErrorMs || 120000; // From config or 2 minutes
// Gap 1: Retry deduplication - prevents thundering herd on concurrent rate limits
export const RATE_LIMIT_DEDUP_WINDOW_MS = config?.rateLimitDedupWindowMs || 5000; // 5 seconds
// Gap 2: Consecutive failure tracking - extended cooldown after repeated failures
export const MAX_CONSECUTIVE_FAILURES = config?.maxConsecutiveFailures || 3;
export const EXTENDED_COOLDOWN_MS = config?.extendedCooldownMs || 60000; // 1 minute
// Gap 4: Capacity exhaustion - shorter retry for model capacity issues (not quota)
export const CAPACITY_RETRY_DELAY_MS = config?.capacityRetryDelayMs || 2000; // 2 seconds
export const MAX_CAPACITY_RETRIES = config?.maxCapacityRetries || 3;
// Thinking model constants
export const MIN_SIGNATURE_LENGTH = 50; // Minimum valid thinking signature length
// Account selection strategies
export const SELECTION_STRATEGIES = ['sticky', 'round-robin', 'hybrid'];
export const DEFAULT_SELECTION_STRATEGY = 'hybrid';
// Gemini-specific limits
export const GEMINI_MAX_OUTPUT_TOKENS = 16384;
@@ -235,6 +250,11 @@ export default {
MAX_EMPTY_RESPONSE_RETRIES,
MAX_ACCOUNTS,
MAX_WAIT_BEFORE_ERROR_MS,
RATE_LIMIT_DEDUP_WINDOW_MS,
MAX_CONSECUTIVE_FAILURES,
EXTENDED_COOLDOWN_MS,
CAPACITY_RETRY_DELAY_MS,
MAX_CAPACITY_RETRIES,
MIN_SIGNATURE_LENGTH,
GEMINI_MAX_OUTPUT_TOKENS,
GEMINI_SKIP_SIGNATURE,