feat: add configurable account selection strategies
Refactor account selection into a strategy pattern with three options: - Sticky: cache-optimized, stays on same account until rate-limited - Round-robin: load-balanced, rotates every request - Hybrid (default): smart distribution using health scores, token buckets, and LRU The hybrid strategy uses multiple signals for optimal account selection: health tracking for reliability, client-side token buckets for rate limiting, and LRU freshness to prefer rested accounts. Includes WebUI settings for strategy selection and unit tests. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -237,6 +237,18 @@ window.translations.en = {
|
||||
defaultCooldownDesc: "Fallback cooldown when API doesn't provide a reset time.",
|
||||
maxWaitThreshold: "Max Wait Before Error",
|
||||
maxWaitDesc: "If all accounts are rate-limited longer than this, error immediately instead of waiting.",
|
||||
// Error Handling Tuning
|
||||
errorHandlingTuning: "Error Handling Tuning",
|
||||
rateLimitDedupWindow: "Rate Limit Dedup Window",
|
||||
rateLimitDedupWindowDesc: "Prevents concurrent retry storms when multiple requests hit rate limits simultaneously.",
|
||||
maxConsecutiveFailures: "Max Consecutive Failures",
|
||||
maxConsecutiveFailuresDesc: "Number of consecutive failures before applying extended cooldown to an account.",
|
||||
extendedCooldown: "Extended Cooldown",
|
||||
extendedCooldownDesc: "Cooldown duration applied after max consecutive failures reached.",
|
||||
capacityRetryDelay: "Capacity Retry Delay",
|
||||
capacityRetryDelayDesc: "Delay before retrying when model capacity is exhausted (not quota).",
|
||||
maxCapacityRetries: "Max Capacity Retries",
|
||||
maxCapacityRetriesDesc: "Maximum retries for capacity exhaustion before switching accounts.",
|
||||
saveConfigServer: "Save Configuration",
|
||||
serverRestartAlert: "Changes saved to {path}. Restart server to apply some settings.",
|
||||
changePassword: "Change WebUI Password",
|
||||
@@ -318,6 +330,18 @@ window.translations.en = {
|
||||
failedToUpdateModelConfig: "Failed to update model config",
|
||||
fieldUpdated: "{displayName} updated to {value}",
|
||||
failedToUpdateField: "Failed to update {displayName}",
|
||||
// Account Selection Strategy
|
||||
accountSelectionStrategy: "Account Selection Strategy",
|
||||
selectionStrategy: "Selection Strategy",
|
||||
strategyStickyLabel: "Sticky (Cache Optimized)",
|
||||
strategyRoundRobinLabel: "Round Robin (Load Balanced)",
|
||||
strategyHybridLabel: "Hybrid (Smart Distribution)",
|
||||
strategyStickyDesc: "Stays on same account until rate-limited. Best for prompt caching.",
|
||||
strategyRoundRobinDesc: "Rotates to next account on every request. Maximum throughput.",
|
||||
strategyHybridDesc: "Smart selection based on health, tokens, and freshness.",
|
||||
strategyUpdated: "Strategy updated to: {strategy}",
|
||||
failedToUpdateStrategy: "Failed to update strategy",
|
||||
invalidStrategy: "Invalid strategy selected",
|
||||
// Validation Messages
|
||||
mustBeValidNumber: "{fieldName} must be a valid number",
|
||||
mustBeAtLeast: "{fieldName} must be at least {min}",
|
||||
|
||||
Reference in New Issue
Block a user