fix: use CUSTOM_CONNECT_TIMEOUT for gemini too

feat: add grok-4 to openrouter_models.json
This commit is contained in:
Fahad
2025-10-06 23:23:24 +04:00
parent a65485a1e5
commit a33efbde52
5 changed files with 68 additions and 11 deletions

View File

@@ -341,6 +341,25 @@
"temperature_constraint": "fixed",
"description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
"intelligence_score": 8
},
{
"model_name": "x-ai/grok-4",
"aliases": [
"grok-4",
"grok4",
"grok"
],
"context_window": 256000,
"max_output_tokens": 256000,
"supports_extended_thinking": true,
"supports_json_mode": true,
"supports_function_calling": true,
"supports_images": true,
"max_image_size_mb": 20.0,
"supports_temperature": true,
"temperature_constraint": "range",
"description": "xAI's Grok 4 via OpenRouter with vision and advanced reasoning",
"intelligence_score": 15
}
]
}

View File

@@ -44,7 +44,7 @@ Regardless of your default configuration, you can specify models per request:
| **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning |
| **`gpt5-mini`** (GPT-5 Mini) | OpenAI | 400K tokens | Efficient variant with reasoning | Balanced performance and capability |
| **`gpt5-nano`** (GPT-5 Nano) | OpenAI | 400K tokens | Fastest, cheapest GPT-5 variant | Summarization and classification tasks |
| **`grok-4-latest`** | X.AI | 256K tokens | Latest flagship model with reasoning, vision | Complex analysis, reasoning tasks |
| **`grok-4`** | X.AI | 256K tokens | Latest flagship Grok model with reasoning, vision | Complex analysis, reasoning tasks |
| **`grok-3`** | X.AI | 131K tokens | Advanced reasoning model | Deep analysis, complex problems |
| **`grok-3-fast`** | X.AI | 131K tokens | Higher performance variant | Fast responses with reasoning |
| **`llama`** (Llama 3.2) | Custom/Local | 128K tokens | Local inference, privacy | On-device analysis, cost-free processing |

View File

@@ -75,7 +75,7 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
- **`o3-mini`**: Balanced speed/quality (200K context)
- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
- **`grok-3`**: GROK-3 advanced reasoning (131K context)
- **`grok-4-latest`**: GROK-4 latest flagship model (256K context)
- **`grok-4`**: GROK-4 flagship model (256K context)
- **Custom models**: via OpenRouter or local APIs
### Thinking Mode Configuration
@@ -108,7 +108,7 @@ OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini
GOOGLE_ALLOWED_MODELS=flash,pro
# X.AI GROK model restrictions
XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4-latest
XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4
# OpenRouter model restrictions (affects models via custom provider)
OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
@@ -129,11 +129,11 @@ OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
- `pro` (shorthand for Pro model)
**X.AI GROK Models:**
- `grok-4-latest` (256K context, latest flagship model with reasoning, vision, and structured outputs)
- `grok-4` (256K context, flagship Grok model with reasoning, vision, and structured outputs)
- `grok-3` (131K context, advanced reasoning)
- `grok-3-fast` (131K context, higher performance)
- `grok` (shorthand for grok-4-latest)
- `grok4` (shorthand for grok-4-latest)
- `grok` (shorthand for grok-4)
- `grok4` (shorthand for grok-4)
- `grok3` (shorthand for grok-3)
- `grokfast` (shorthand for grok-3-fast)

View File

@@ -10,6 +10,7 @@ if TYPE_CHECKING:
from google import genai
from google.genai import types
from utils.env import get_env
from utils.image_utils import validate_image
from .base import ModelProvider
@@ -133,6 +134,7 @@ class GeminiModelProvider(ModelProvider):
self._client = None
self._token_counters = {} # Cache for token counting
self._base_url = kwargs.get("base_url", None) # Optional custom endpoint
self._timeout_override = self._resolve_http_timeout()
# ------------------------------------------------------------------
# Capability surface
@@ -146,17 +148,49 @@ class GeminiModelProvider(ModelProvider):
def client(self):
"""Lazy initialization of Gemini client."""
if self._client is None:
# Check if custom base URL is provided
http_options_kwargs: dict[str, object] = {}
if self._base_url:
# Use HttpOptions to set custom endpoint
http_options = types.HttpOptions(baseUrl=self._base_url)
logger.debug(f"Initializing Gemini client with custom endpoint: {self._base_url}")
http_options_kwargs["base_url"] = self._base_url
if self._timeout_override is not None:
http_options_kwargs["timeout"] = self._timeout_override
if http_options_kwargs:
http_options = types.HttpOptions(**http_options_kwargs)
logger.debug(
"Initializing Gemini client with options: base_url=%s timeout=%s",
http_options_kwargs.get("base_url"),
http_options_kwargs.get("timeout"),
)
self._client = genai.Client(api_key=self.api_key, http_options=http_options)
else:
# Use default Google endpoint
self._client = genai.Client(api_key=self.api_key)
return self._client
def _resolve_http_timeout(self) -> Optional[float]:
"""Compute timeout override from shared custom timeout environment variables."""
timeouts: list[float] = []
for env_var in [
"CUSTOM_CONNECT_TIMEOUT",
"CUSTOM_READ_TIMEOUT",
"CUSTOM_WRITE_TIMEOUT",
"CUSTOM_POOL_TIMEOUT",
]:
raw_value = get_env(env_var)
if raw_value:
try:
timeouts.append(float(raw_value))
except (TypeError, ValueError):
logger.warning("Invalid %s value '%s'; ignoring.", env_var, raw_value)
if timeouts:
# Use the largest timeout to best approximate long-running requests
resolved = max(timeouts)
logger.debug("Using custom Gemini HTTP timeout: %ss", resolved)
return resolved
return None
# ------------------------------------------------------------------
# Request execution
# ------------------------------------------------------------------

View File

@@ -47,6 +47,7 @@ class TestOpenRouterProvider:
assert provider.validate_model_name("anthropic/claude-3-opus") is True
assert provider.validate_model_name("google/any-model-name") is True
assert provider.validate_model_name("groq/llama-3.1-8b") is True
assert provider.validate_model_name("grok-4") is True
# Unknown models without provider prefix are rejected
assert provider.validate_model_name("gpt-4") is False
@@ -88,6 +89,9 @@ class TestOpenRouterProvider:
assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
assert provider._resolve_model_name("haiku") == "anthropic/claude-3.5-haiku"
assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411"
assert provider._resolve_model_name("grok-4") == "x-ai/grok-4"
assert provider._resolve_model_name("grok4") == "x-ai/grok-4"
assert provider._resolve_model_name("grok") == "x-ai/grok-4"
assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528"
assert provider._resolve_model_name("r1") == "deepseek/deepseek-r1-0528"