Selective fixes from PR #35: Model-specific rate limits & robustness improvements (#37)

* feat: apply local user changes and fixes * ;D * Implement OpenAI support, model-specific rate limiting, and robustness fixes * docs: update pr title * feat: ensure unique openai models endpoint * fix: startup banner alignment and removed duplicates * feat: add model fallback system with --fallback flag * fix: accounts cli hanging after completion * feat: add exit option to accounts cli menu * fix: remove circular dependency warning for fallback flag * feat: show active modes in banner and hide their flags * Remove OpenAI compatibility and fallback features from PR #35 Cherry-picked selective fixes from PR #35 while removing: - OpenAI-compatible API endpoints (/openai/v1/*) - Model fallback system (fallback-config.js) - Thinking block skip for Gemini models - Unnecessary files (pullrequest.md, test-fix.js, test-openai.js) Retained improvements: - Network error handling with retry logic - Model-specific rate limiting - Enhanced health check with quota info - CLI fixes (exit option, process.exit) - Startup banner alignment (debug mode only) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * banner alignment fix * Refactor: Model-specific rate limits and cleanup deprecated code - Remove global rate limit fields (isRateLimited, rateLimitResetTime) in favor of model-specific limits (modelRateLimits[modelId]) - Remove deprecated wrapper functions (is429Error, isAuthInvalidError) from handlers - Filter fetchAvailableModels to only return Claude and Gemini models - Fix getCurrentStickyAccount() to pass model param after waiting - Update /account-limits endpoint to show model-specific limits - Remove multi-account OAuth flow to avoid state mismatch errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * fix: show (x/y) limited status in account-limits table - Status is now "ok" only when all models are available - Shows "(x/y) limited" when x out of y models are exhausted - Provides better visibility into partial rate limiting 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * docs: update CLAUDE.md with model-specific rate limiting - Document modelRateLimits[modelId] for per-model rate tracking - Add isNetworkError() helper to utilities section 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: M1noa <minoa@minoa.cat> Co-authored-by: Minoa <altgithub@minoa.cat> Co-authored-by: Claude <noreply@anthropic.com>
2026-01-03 15:33:49 +05:30
parent 2d05dd5b62
commit 9c4a712a9a
15 changed files with 474 additions and 194 deletions
--- a/src/server.js
+++ b/src/server.js
@@ -70,8 +70,9 @@ function parseError(error) {
        statusCode = 400;  // Use 400 to ensure client does not retry (429 and 529 trigger retries)

        // Try to extract the quota reset time from the error
-        const resetMatch = error.message.match(/quota will reset after (\d+h\d+m\d+s|\d+m\d+s|\d+s)/i);
-        const modelMatch = error.message.match(/"model":\s*"([^"]+)"/);
+        const resetMatch = error.message.match(/quota will reset after ([\dh\dm\ds]+)/i);
+        // Try to extract model from our error format "Rate limited on <model>" or JSON format
+        const modelMatch = error.message.match(/Rate limited on ([^.]+)\./) || error.message.match(/"model":\s*"([^"]+)"/);
        const model = modelMatch ? modelMatch[1] : 'the model';

        if (resetMatch) {
@@ -111,22 +112,107 @@ app.use((req, res, next) => {
 });

 /**
- * Health check endpoint
+ * Health check endpoint - Detailed status
+ * Returns status of all accounts including rate limits and model quotas
 */
 app.get('/health', async (req, res) => {
    try {
        await ensureInitialized();
+        const start = Date.now();
+        
+        // Get high-level status first
        const status = accountManager.getStatus();
+        const allAccounts = accountManager.getAllAccounts();
+        
+        // Fetch quotas for each account in parallel to get detailed model info
+        const accountDetails = await Promise.allSettled(
+            allAccounts.map(async (account) => {
+                // Check model-specific rate limits
+                const activeModelLimits = Object.entries(account.modelRateLimits || {})
+                    .filter(([_, limit]) => limit.isRateLimited && limit.resetTime > Date.now());
+                const isRateLimited = activeModelLimits.length > 0;
+                const soonestReset = activeModelLimits.length > 0
+                    ? Math.min(...activeModelLimits.map(([_, l]) => l.resetTime))
+                    : null;
+
+                const baseInfo = {
+                    email: account.email,
+                    lastUsed: account.lastUsed ? new Date(account.lastUsed).toISOString() : null,
+                    modelRateLimits: account.modelRateLimits || {},
+                    rateLimitCooldownRemaining: soonestReset ? Math.max(0, soonestReset - Date.now()) : 0
+                };
+
+                // Skip invalid accounts for quota check
+                if (account.isInvalid) {
+                    return {
+                        ...baseInfo,
+                        status: 'invalid',
+                        error: account.invalidReason,
+                        models: {}
+                    };
+                }
+
+                try {
+                    const token = await accountManager.getTokenForAccount(account);
+                    const quotas = await getModelQuotas(token);
+
+                    // Format quotas for readability
+                    const formattedQuotas = {};
+                    for (const [modelId, info] of Object.entries(quotas)) {
+                        formattedQuotas[modelId] = {
+                            remaining: info.remainingFraction !== null ? `${Math.round(info.remainingFraction * 100)}%` : 'N/A',
+                            remainingFraction: info.remainingFraction,
+                            resetTime: info.resetTime || null
+                        };
+                    }
+
+                    return {
+                        ...baseInfo,
+                        status: isRateLimited ? 'rate-limited' : 'ok',
+                        models: formattedQuotas
+                    };
+                } catch (error) {
+                    return {
+                        ...baseInfo,
+                        status: 'error',
+                        error: error.message,
+                        models: {}
+                    };
+                }
+            })
+        );
+
+        // Process results
+        const detailedAccounts = accountDetails.map((result, index) => {
+            if (result.status === 'fulfilled') {
+                return result.value;
+            } else {
+                const acc = allAccounts[index];
+                return {
+                    email: acc.email,
+                    status: 'error',
+                    error: result.reason?.message || 'Unknown error',
+                    modelRateLimits: acc.modelRateLimits || {}
+                };
+            }
+        });

        res.json({
            status: 'ok',
-            accounts: status.summary,
-            available: status.available,
-            rateLimited: status.rateLimited,
-            invalid: status.invalid,
-            timestamp: new Date().toISOString()
+            timestamp: new Date().toISOString(),
+            latencyMs: Date.now() - start,
+            summary: status.summary,
+            counts: {
+                total: status.total,
+                available: status.available,
+                rateLimited: status.rateLimited,
+                invalid: status.invalid
+            },
+            accounts: detailedAccounts
        });
+
    } catch (error) {
+        logger.error('[API] Health check failed:', error);
        res.status(503).json({
            status: 'error',
            error: error.message,
@@ -236,11 +322,21 @@ app.get('/account-limits', async (req, res) => {
                let accStatus;
                if (acc.isInvalid) {
                    accStatus = 'invalid';
-                } else if (acc.isRateLimited) {
-                    const remaining = acc.rateLimitResetTime ? acc.rateLimitResetTime - Date.now() : 0;
-                    accStatus = remaining > 0 ? `limited (${formatDuration(remaining)})` : 'rate-limited';
+                } else if (accLimit?.status === 'error') {
+                    accStatus = 'error';
                } else {
-                    accStatus = accLimit?.status || 'ok';
+                    // Count exhausted models (0% or null remaining)
+                    const models = accLimit?.models || {};
+                    const modelCount = Object.keys(models).length;
+                    const exhaustedCount = Object.values(models).filter(
+                        q => q.remainingFraction === 0 || q.remainingFraction === null
+                    ).length;
+
+                    if (exhaustedCount === 0) {
+                        accStatus = 'ok';
+                    } else {
+                        accStatus = `(${exhaustedCount}/${modelCount}) limited`;
+                    }
                }

                // Get reset time from quota API
@@ -262,14 +358,14 @@ app.get('/account-limits', async (req, res) => {
            }
            lines.push('');

-            // Calculate column widths
-            const modelColWidth = Math.max(25, ...sortedModels.map(m => m.length)) + 2;
-            const accountColWidth = 22;
+            // Calculate column widths - need more space for reset time info
+            const modelColWidth = Math.max(28, ...sortedModels.map(m => m.length)) + 2;
+            const accountColWidth = 30;

            // Header row
            let header = 'Model'.padEnd(modelColWidth);
            for (const acc of accountLimits) {
-                const shortEmail = acc.email.split('@')[0].slice(0, 18);
+                const shortEmail = acc.email.split('@')[0].slice(0, 26);
                header += shortEmail.padEnd(accountColWidth);
            }
            lines.push(header);
@@ -281,12 +377,22 @@ app.get('/account-limits', async (req, res) => {
                for (const acc of accountLimits) {
                    const quota = acc.models?.[modelId];
                    let cell;
-                    if (acc.status !== 'ok') {
+                    if (acc.status !== 'ok' && acc.status !== 'rate-limited') {
                        cell = `[${acc.status}]`;
                    } else if (!quota) {
                        cell = '-';
-                    } else if (quota.remainingFraction === null) {
-                        cell = '0% (exhausted)';
+                    } else if (quota.remainingFraction === 0 || quota.remainingFraction === null) {
+                        // Show reset time for exhausted models
+                        if (quota.resetTime) {
+                            const resetMs = new Date(quota.resetTime).getTime() - Date.now();
+                            if (resetMs > 0) {
+                                cell = `0% (wait ${formatDuration(resetMs)})`;
+                            } else {
+                                cell = '0% (resetting...)';
+                            }
+                        } else {
+                            cell = '0% (exhausted)';
+                        }
                    } else {
                        const pct = Math.round(quota.remainingFraction * 100);
                        cell = `${pct}%`;
@@ -404,17 +510,17 @@ app.post('/v1/messages/count_tokens', (req, res) => {
 /**
 * Main messages endpoint - Anthropic Messages API compatible
 */
+
+
+/**
+ * Anthropic-compatible Messages API
+ * POST /v1/messages
+ */
 app.post('/v1/messages', async (req, res) => {
    try {
        // Ensure account manager is initialized
        await ensureInitialized();

-        // Optimistic Retry: If ALL accounts are rate-limited, reset them to force a fresh check.
-        // If we have some available accounts, we try them first.
-        if (accountManager.isAllRateLimited()) {
-            logger.warn('[Server] All accounts rate-limited. Resetting state for optimistic retry.');
-            accountManager.resetAllRateLimits();
-        }

        const {
            model,
@@ -430,6 +536,14 @@ app.post('/v1/messages', async (req, res) => {
            temperature
        } = req.body;

+        // Optimistic Retry: If ALL accounts are rate-limited for this model, reset them to force a fresh check.
+        // If we have some available accounts, we try them first.
+        const modelId = model || 'claude-3-5-sonnet-20241022';
+        if (accountManager.isAllRateLimited(modelId)) {
+            logger.warn(`[Server] All accounts rate-limited for ${modelId}. Resetting state for optimistic retry.`);
+            accountManager.resetAllRateLimits();
+        }
+
        // Validate required fields
        if (!messages || !Array.isArray(messages)) {
            return res.status(400).json({