feat: add configurable account selection strategies

Refactor account selection into a strategy pattern with three options:
- Sticky: cache-optimized, stays on same account until rate-limited
- Round-robin: load-balanced, rotates every request
- Hybrid (default): smart distribution using health scores, token buckets, and LRU

The hybrid strategy uses multiple signals for optimal account selection:
health tracking for reliability, client-side token buckets for rate limiting,
and LRU freshness to prefer rested accounts.

Includes WebUI settings for strategy selection and unit tests.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2026-01-18 03:48:43 +05:30
parent 973234372b
commit 5ae19a5b72
31 changed files with 2721 additions and 353 deletions

View File

@@ -237,6 +237,18 @@ window.translations.en = {
defaultCooldownDesc: "Fallback cooldown when API doesn't provide a reset time.",
maxWaitThreshold: "Max Wait Before Error",
maxWaitDesc: "If all accounts are rate-limited longer than this, error immediately instead of waiting.",
// Error Handling Tuning
errorHandlingTuning: "Error Handling Tuning",
rateLimitDedupWindow: "Rate Limit Dedup Window",
rateLimitDedupWindowDesc: "Prevents concurrent retry storms when multiple requests hit rate limits simultaneously.",
maxConsecutiveFailures: "Max Consecutive Failures",
maxConsecutiveFailuresDesc: "Number of consecutive failures before applying extended cooldown to an account.",
extendedCooldown: "Extended Cooldown",
extendedCooldownDesc: "Cooldown duration applied after max consecutive failures reached.",
capacityRetryDelay: "Capacity Retry Delay",
capacityRetryDelayDesc: "Delay before retrying when model capacity is exhausted (not quota).",
maxCapacityRetries: "Max Capacity Retries",
maxCapacityRetriesDesc: "Maximum retries for capacity exhaustion before switching accounts.",
saveConfigServer: "Save Configuration",
serverRestartAlert: "Changes saved to {path}. Restart server to apply some settings.",
changePassword: "Change WebUI Password",
@@ -318,6 +330,18 @@ window.translations.en = {
failedToUpdateModelConfig: "Failed to update model config",
fieldUpdated: "{displayName} updated to {value}",
failedToUpdateField: "Failed to update {displayName}",
// Account Selection Strategy
accountSelectionStrategy: "Account Selection Strategy",
selectionStrategy: "Selection Strategy",
strategyStickyLabel: "Sticky (Cache Optimized)",
strategyRoundRobinLabel: "Round Robin (Load Balanced)",
strategyHybridLabel: "Hybrid (Smart Distribution)",
strategyStickyDesc: "Stays on same account until rate-limited. Best for prompt caching.",
strategyRoundRobinDesc: "Rotates to next account on every request. Maximum throughput.",
strategyHybridDesc: "Smart selection based on health, tokens, and freshness.",
strategyUpdated: "Strategy updated to: {strategy}",
failedToUpdateStrategy: "Failed to update strategy",
invalidStrategy: "Invalid strategy selected",
// Validation Messages
mustBeValidNumber: "{fieldName} must be a valid number",
mustBeAtLeast: "{fieldName} must be at least {min}",