diff --git a/conf/openrouter_models.json b/conf/openrouter_models.json index b3f35fc..53fcec6 100644 --- a/conf/openrouter_models.json +++ b/conf/openrouter_models.json @@ -341,6 +341,25 @@ "temperature_constraint": "fixed", "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks", "intelligence_score": 8 + }, + { + "model_name": "x-ai/grok-4", + "aliases": [ + "grok-4", + "grok4", + "grok" + ], + "context_window": 256000, + "max_output_tokens": 256000, + "supports_extended_thinking": true, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": true, + "temperature_constraint": "range", + "description": "xAI's Grok 4 via OpenRouter with vision and advanced reasoning", + "intelligence_score": 15 } ] } diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md index 000c0f3..58b99d7 100644 --- a/docs/advanced-usage.md +++ b/docs/advanced-usage.md @@ -44,7 +44,7 @@ Regardless of your default configuration, you can specify models per request: | **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning | | **`gpt5-mini`** (GPT-5 Mini) | OpenAI | 400K tokens | Efficient variant with reasoning | Balanced performance and capability | | **`gpt5-nano`** (GPT-5 Nano) | OpenAI | 400K tokens | Fastest, cheapest GPT-5 variant | Summarization and classification tasks | -| **`grok-4-latest`** | X.AI | 256K tokens | Latest flagship model with reasoning, vision | Complex analysis, reasoning tasks | +| **`grok-4`** | X.AI | 256K tokens | Latest flagship Grok model with reasoning, vision | Complex analysis, reasoning tasks | | **`grok-3`** | X.AI | 131K tokens | Advanced reasoning model | Deep analysis, complex problems | | **`grok-3-fast`** | X.AI | 131K tokens | Higher performance variant | Fast responses with reasoning | | **`llama`** (Llama 3.2) | Custom/Local | 128K tokens | Local inference, privacy | On-device analysis, cost-free processing | diff --git a/docs/configuration.md b/docs/configuration.md index 9b48fab..a489ec9 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -75,7 +75,7 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) - **`o3-mini`**: Balanced speed/quality (200K context) - **`o4-mini`**: Latest reasoning model, optimized for shorter contexts - **`grok-3`**: GROK-3 advanced reasoning (131K context) -- **`grok-4-latest`**: GROK-4 latest flagship model (256K context) +- **`grok-4`**: GROK-4 flagship model (256K context) - **Custom models**: via OpenRouter or local APIs ### Thinking Mode Configuration @@ -108,7 +108,7 @@ OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini GOOGLE_ALLOWED_MODELS=flash,pro # X.AI GROK model restrictions -XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4-latest +XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4 # OpenRouter model restrictions (affects models via custom provider) OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral @@ -129,11 +129,11 @@ OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral - `pro` (shorthand for Pro model) **X.AI GROK Models:** -- `grok-4-latest` (256K context, latest flagship model with reasoning, vision, and structured outputs) +- `grok-4` (256K context, flagship Grok model with reasoning, vision, and structured outputs) - `grok-3` (131K context, advanced reasoning) - `grok-3-fast` (131K context, higher performance) -- `grok` (shorthand for grok-4-latest) -- `grok4` (shorthand for grok-4-latest) +- `grok` (shorthand for grok-4) +- `grok4` (shorthand for grok-4) - `grok3` (shorthand for grok-3) - `grokfast` (shorthand for grok-3-fast) diff --git a/providers/gemini.py b/providers/gemini.py index 965faf9..a01df16 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -10,6 +10,7 @@ if TYPE_CHECKING: from google import genai from google.genai import types +from utils.env import get_env from utils.image_utils import validate_image from .base import ModelProvider @@ -133,6 +134,7 @@ class GeminiModelProvider(ModelProvider): self._client = None self._token_counters = {} # Cache for token counting self._base_url = kwargs.get("base_url", None) # Optional custom endpoint + self._timeout_override = self._resolve_http_timeout() # ------------------------------------------------------------------ # Capability surface @@ -146,17 +148,49 @@ class GeminiModelProvider(ModelProvider): def client(self): """Lazy initialization of Gemini client.""" if self._client is None: - # Check if custom base URL is provided + http_options_kwargs: dict[str, object] = {} if self._base_url: - # Use HttpOptions to set custom endpoint - http_options = types.HttpOptions(baseUrl=self._base_url) - logger.debug(f"Initializing Gemini client with custom endpoint: {self._base_url}") + http_options_kwargs["base_url"] = self._base_url + if self._timeout_override is not None: + http_options_kwargs["timeout"] = self._timeout_override + + if http_options_kwargs: + http_options = types.HttpOptions(**http_options_kwargs) + logger.debug( + "Initializing Gemini client with options: base_url=%s timeout=%s", + http_options_kwargs.get("base_url"), + http_options_kwargs.get("timeout"), + ) self._client = genai.Client(api_key=self.api_key, http_options=http_options) else: - # Use default Google endpoint self._client = genai.Client(api_key=self.api_key) return self._client + def _resolve_http_timeout(self) -> Optional[float]: + """Compute timeout override from shared custom timeout environment variables.""" + + timeouts: list[float] = [] + for env_var in [ + "CUSTOM_CONNECT_TIMEOUT", + "CUSTOM_READ_TIMEOUT", + "CUSTOM_WRITE_TIMEOUT", + "CUSTOM_POOL_TIMEOUT", + ]: + raw_value = get_env(env_var) + if raw_value: + try: + timeouts.append(float(raw_value)) + except (TypeError, ValueError): + logger.warning("Invalid %s value '%s'; ignoring.", env_var, raw_value) + + if timeouts: + # Use the largest timeout to best approximate long-running requests + resolved = max(timeouts) + logger.debug("Using custom Gemini HTTP timeout: %ss", resolved) + return resolved + + return None + # ------------------------------------------------------------------ # Request execution # ------------------------------------------------------------------ diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index 4c57f81..3b83be0 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -47,6 +47,7 @@ class TestOpenRouterProvider: assert provider.validate_model_name("anthropic/claude-3-opus") is True assert provider.validate_model_name("google/any-model-name") is True assert provider.validate_model_name("groq/llama-3.1-8b") is True + assert provider.validate_model_name("grok-4") is True # Unknown models without provider prefix are rejected assert provider.validate_model_name("gpt-4") is False @@ -88,6 +89,9 @@ class TestOpenRouterProvider: assert provider._resolve_model_name("o4-mini") == "openai/o4-mini" assert provider._resolve_model_name("haiku") == "anthropic/claude-3.5-haiku" assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411" + assert provider._resolve_model_name("grok-4") == "x-ai/grok-4" + assert provider._resolve_model_name("grok4") == "x-ai/grok-4" + assert provider._resolve_model_name("grok") == "x-ai/grok-4" assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528" assert provider._resolve_model_name("r1") == "deepseek/deepseek-r1-0528"