Selective fixes from PR #35: Model-specific rate limits & robustness improvements (#37)

* feat: apply local user changes and fixes

* ;D

* Implement OpenAI support, model-specific rate limiting, and robustness fixes

* docs: update pr title

* feat: ensure unique openai models endpoint

* fix: startup banner alignment and removed duplicates

* feat: add model fallback system with --fallback flag

* fix: accounts cli hanging after completion

* feat: add exit option to accounts cli menu

* fix: remove circular dependency warning for fallback flag

* feat: show active modes in banner and hide their flags

* Remove OpenAI compatibility and fallback features from PR #35

Cherry-picked selective fixes from PR #35 while removing:
- OpenAI-compatible API endpoints (/openai/v1/*)
- Model fallback system (fallback-config.js)
- Thinking block skip for Gemini models
- Unnecessary files (pullrequest.md, test-fix.js, test-openai.js)

Retained improvements:
- Network error handling with retry logic
- Model-specific rate limiting
- Enhanced health check with quota info
- CLI fixes (exit option, process.exit)
- Startup banner alignment (debug mode only)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* banner alignment fix

* Refactor: Model-specific rate limits and cleanup deprecated code

- Remove global rate limit fields (isRateLimited, rateLimitResetTime) in favor of model-specific limits (modelRateLimits[modelId])
- Remove deprecated wrapper functions (is429Error, isAuthInvalidError) from handlers
- Filter fetchAvailableModels to only return Claude and Gemini models
- Fix getCurrentStickyAccount() to pass model param after waiting
- Update /account-limits endpoint to show model-specific limits
- Remove multi-account OAuth flow to avoid state mismatch errors

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* fix: show (x/y) limited status in account-limits table

- Status is now "ok" only when all models are available
- Shows "(x/y) limited" when x out of y models are exhausted
- Provides better visibility into partial rate limiting

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* docs: update CLAUDE.md with model-specific rate limiting

- Document modelRateLimits[modelId] for per-model rate tracking
- Add isNetworkError() helper to utilities section

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: M1noa <minoa@minoa.cat>
Co-authored-by: Minoa <altgithub@minoa.cat>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Badri Narayanan S
2026-01-03 15:33:49 +05:30
committed by GitHub
parent 2d05dd5b62
commit 9c4a712a9a
15 changed files with 474 additions and 194 deletions

View File

@@ -13,28 +13,12 @@ import {
} from '../constants.js';
import { convertGoogleToAnthropic } from '../format/index.js';
import { isRateLimitError, isAuthError } from '../errors.js';
import { formatDuration, sleep } from '../utils/helpers.js';
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
import { logger } from '../utils/logger.js';
import { parseResetTime } from './rate-limit-parser.js';
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
import { parseThinkingSSEResponse } from './sse-parser.js';
/**
* Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED)
* @deprecated Use isRateLimitError from errors.js instead
*/
function is429Error(error) {
return isRateLimitError(error);
}
/**
* Check if an error is an auth-invalid error (credentials need re-authentication)
* @deprecated Use isAuthError from errors.js instead
*/
function isAuthInvalidError(error) {
return isAuthError(error);
}
/**
* Send a non-streaming request to Cloud Code with multi-account support
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
@@ -59,7 +43,7 @@ export async function sendMessage(anthropicRequest, accountManager) {
for (let attempt = 0; attempt < maxAttempts; attempt++) {
// Use sticky account selection for cache continuity
const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model);
let account = stickyAccount;
// Handle waiting for sticky account
@@ -67,19 +51,19 @@ export async function sendMessage(anthropicRequest, accountManager) {
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
await sleep(waitMs);
accountManager.clearExpiredLimits();
account = accountManager.getCurrentStickyAccount();
account = accountManager.getCurrentStickyAccount(model);
}
// Handle all accounts rate-limited
if (!account) {
if (accountManager.isAllRateLimited()) {
const allWaitMs = accountManager.getMinWaitTimeMs();
if (accountManager.isAllRateLimited(model)) {
const allWaitMs = accountManager.getMinWaitTimeMs(model);
const resetTime = new Date(Date.now() + allWaitMs).toISOString();
// If wait time is too long (> 2 minutes), throw error immediately
if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
throw new Error(
`RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
);
}
@@ -88,7 +72,7 @@ export async function sendMessage(anthropicRequest, accountManager) {
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
await sleep(allWaitMs);
accountManager.clearExpiredLimits();
account = accountManager.pickNext();
account = accountManager.pickNext(model);
}
if (!account) {
@@ -163,7 +147,7 @@ export async function sendMessage(anthropicRequest, accountManager) {
return convertGoogleToAnthropic(data, anthropicRequest.model);
} catch (endpointError) {
if (is429Error(endpointError)) {
if (isRateLimitError(endpointError)) {
throw endpointError; // Re-throw to trigger account switch
}
logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
@@ -176,19 +160,19 @@ export async function sendMessage(anthropicRequest, accountManager) {
// If all endpoints returned 429, mark account as rate-limited
if (lastError.is429) {
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
accountManager.markRateLimited(account.email, lastError.resetMs);
accountManager.markRateLimited(account.email, lastError.resetMs, model);
throw new Error(`Rate limited: ${lastError.errorText}`);
}
throw lastError;
}
} catch (error) {
if (is429Error(error)) {
if (isRateLimitError(error)) {
// Rate limited - already marked, continue to next account
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
continue;
}
if (isAuthInvalidError(error)) {
if (isAuthError(error)) {
// Auth invalid - already marked, continue to next account
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
continue;
@@ -197,10 +181,17 @@ export async function sendMessage(anthropicRequest, accountManager) {
// UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
accountManager.pickNext(); // Force advance to next account
accountManager.pickNext(model); // Force advance to next account
continue;
}
if (isNetworkError(error)) {
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
await sleep(1000); // Brief pause before retry
accountManager.pickNext(model); // Advance to next account
continue;
}
throw error;
}
}