fix: use CUSTOM_CONNECT_TIMEOUT for gemini too
feat: add grok-4 to openrouter_models.json
This commit is contained in:
@@ -341,6 +341,25 @@
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
|
||||
"intelligence_score": 8
|
||||
},
|
||||
{
|
||||
"model_name": "x-ai/grok-4",
|
||||
"aliases": [
|
||||
"grok-4",
|
||||
"grok4",
|
||||
"grok"
|
||||
],
|
||||
"context_window": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": true,
|
||||
"temperature_constraint": "range",
|
||||
"description": "xAI's Grok 4 via OpenRouter with vision and advanced reasoning",
|
||||
"intelligence_score": 15
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ Regardless of your default configuration, you can specify models per request:
|
||||
| **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning |
|
||||
| **`gpt5-mini`** (GPT-5 Mini) | OpenAI | 400K tokens | Efficient variant with reasoning | Balanced performance and capability |
|
||||
| **`gpt5-nano`** (GPT-5 Nano) | OpenAI | 400K tokens | Fastest, cheapest GPT-5 variant | Summarization and classification tasks |
|
||||
| **`grok-4-latest`** | X.AI | 256K tokens | Latest flagship model with reasoning, vision | Complex analysis, reasoning tasks |
|
||||
| **`grok-4`** | X.AI | 256K tokens | Latest flagship Grok model with reasoning, vision | Complex analysis, reasoning tasks |
|
||||
| **`grok-3`** | X.AI | 131K tokens | Advanced reasoning model | Deep analysis, complex problems |
|
||||
| **`grok-3-fast`** | X.AI | 131K tokens | Higher performance variant | Fast responses with reasoning |
|
||||
| **`llama`** (Llama 3.2) | Custom/Local | 128K tokens | Local inference, privacy | On-device analysis, cost-free processing |
|
||||
|
||||
@@ -75,7 +75,7 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
||||
- **`o3-mini`**: Balanced speed/quality (200K context)
|
||||
- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
|
||||
- **`grok-3`**: GROK-3 advanced reasoning (131K context)
|
||||
- **`grok-4-latest`**: GROK-4 latest flagship model (256K context)
|
||||
- **`grok-4`**: GROK-4 flagship model (256K context)
|
||||
- **Custom models**: via OpenRouter or local APIs
|
||||
|
||||
### Thinking Mode Configuration
|
||||
@@ -108,7 +108,7 @@ OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini
|
||||
GOOGLE_ALLOWED_MODELS=flash,pro
|
||||
|
||||
# X.AI GROK model restrictions
|
||||
XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4-latest
|
||||
XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4
|
||||
|
||||
# OpenRouter model restrictions (affects models via custom provider)
|
||||
OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
|
||||
@@ -129,11 +129,11 @@ OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
|
||||
- `pro` (shorthand for Pro model)
|
||||
|
||||
**X.AI GROK Models:**
|
||||
- `grok-4-latest` (256K context, latest flagship model with reasoning, vision, and structured outputs)
|
||||
- `grok-4` (256K context, flagship Grok model with reasoning, vision, and structured outputs)
|
||||
- `grok-3` (131K context, advanced reasoning)
|
||||
- `grok-3-fast` (131K context, higher performance)
|
||||
- `grok` (shorthand for grok-4-latest)
|
||||
- `grok4` (shorthand for grok-4-latest)
|
||||
- `grok` (shorthand for grok-4)
|
||||
- `grok4` (shorthand for grok-4)
|
||||
- `grok3` (shorthand for grok-3)
|
||||
- `grokfast` (shorthand for grok-3-fast)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
from utils.env import get_env
|
||||
from utils.image_utils import validate_image
|
||||
|
||||
from .base import ModelProvider
|
||||
@@ -133,6 +134,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
self._client = None
|
||||
self._token_counters = {} # Cache for token counting
|
||||
self._base_url = kwargs.get("base_url", None) # Optional custom endpoint
|
||||
self._timeout_override = self._resolve_http_timeout()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Capability surface
|
||||
@@ -146,17 +148,49 @@ class GeminiModelProvider(ModelProvider):
|
||||
def client(self):
|
||||
"""Lazy initialization of Gemini client."""
|
||||
if self._client is None:
|
||||
# Check if custom base URL is provided
|
||||
http_options_kwargs: dict[str, object] = {}
|
||||
if self._base_url:
|
||||
# Use HttpOptions to set custom endpoint
|
||||
http_options = types.HttpOptions(baseUrl=self._base_url)
|
||||
logger.debug(f"Initializing Gemini client with custom endpoint: {self._base_url}")
|
||||
http_options_kwargs["base_url"] = self._base_url
|
||||
if self._timeout_override is not None:
|
||||
http_options_kwargs["timeout"] = self._timeout_override
|
||||
|
||||
if http_options_kwargs:
|
||||
http_options = types.HttpOptions(**http_options_kwargs)
|
||||
logger.debug(
|
||||
"Initializing Gemini client with options: base_url=%s timeout=%s",
|
||||
http_options_kwargs.get("base_url"),
|
||||
http_options_kwargs.get("timeout"),
|
||||
)
|
||||
self._client = genai.Client(api_key=self.api_key, http_options=http_options)
|
||||
else:
|
||||
# Use default Google endpoint
|
||||
self._client = genai.Client(api_key=self.api_key)
|
||||
return self._client
|
||||
|
||||
def _resolve_http_timeout(self) -> Optional[float]:
|
||||
"""Compute timeout override from shared custom timeout environment variables."""
|
||||
|
||||
timeouts: list[float] = []
|
||||
for env_var in [
|
||||
"CUSTOM_CONNECT_TIMEOUT",
|
||||
"CUSTOM_READ_TIMEOUT",
|
||||
"CUSTOM_WRITE_TIMEOUT",
|
||||
"CUSTOM_POOL_TIMEOUT",
|
||||
]:
|
||||
raw_value = get_env(env_var)
|
||||
if raw_value:
|
||||
try:
|
||||
timeouts.append(float(raw_value))
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Invalid %s value '%s'; ignoring.", env_var, raw_value)
|
||||
|
||||
if timeouts:
|
||||
# Use the largest timeout to best approximate long-running requests
|
||||
resolved = max(timeouts)
|
||||
logger.debug("Using custom Gemini HTTP timeout: %ss", resolved)
|
||||
return resolved
|
||||
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Request execution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -47,6 +47,7 @@ class TestOpenRouterProvider:
|
||||
assert provider.validate_model_name("anthropic/claude-3-opus") is True
|
||||
assert provider.validate_model_name("google/any-model-name") is True
|
||||
assert provider.validate_model_name("groq/llama-3.1-8b") is True
|
||||
assert provider.validate_model_name("grok-4") is True
|
||||
|
||||
# Unknown models without provider prefix are rejected
|
||||
assert provider.validate_model_name("gpt-4") is False
|
||||
@@ -88,6 +89,9 @@ class TestOpenRouterProvider:
|
||||
assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
|
||||
assert provider._resolve_model_name("haiku") == "anthropic/claude-3.5-haiku"
|
||||
assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411"
|
||||
assert provider._resolve_model_name("grok-4") == "x-ai/grok-4"
|
||||
assert provider._resolve_model_name("grok4") == "x-ai/grok-4"
|
||||
assert provider._resolve_model_name("grok") == "x-ai/grok-4"
|
||||
assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528"
|
||||
assert provider._resolve_model_name("r1") == "deepseek/deepseek-r1-0528"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user