Merge branch 'BeehiveInnovations:main' into feat-local_support_with_UTF-8_encoding-update

2025-06-23 12:51:56 +02:00
parent 12378addc9 b4852c825f
commit 7e5f95531b
25 changed files with 1185 additions and 220 deletions
--- a/.env.example
+++ b/.env.example
@@ -3,8 +3,11 @@

 # API Keys - At least one is required
 #
-# IMPORTANT: Use EITHER OpenRouter OR native APIs (Gemini/OpenAI), not both!
-# Having both creates ambiguity about which provider serves each model.
+# IMPORTANT: Choose ONE approach:
+# - Native APIs (Gemini/OpenAI/XAI) for direct access
+# - DIAL for unified enterprise access
+# - OpenRouter for unified cloud access
+# Having multiple unified providers creates ambiguity about which serves each model.
 #
 # Option 1: Use native APIs (recommended for direct access)
 # Get your Gemini API key from: https://makersuite.google.com/app/apikey
@@ -16,6 +19,12 @@ OPENAI_API_KEY=your_openai_api_key_here
 # Get your X.AI API key from: https://console.x.ai/
 XAI_API_KEY=your_xai_api_key_here

+# Get your DIAL API key and configure host URL
+# DIAL provides unified access to multiple AI models through a single API
+DIAL_API_KEY=your_dial_api_key_here
+# DIAL_API_HOST=https://core.dialx.ai        # Optional: Base URL without /openai suffix (auto-appended)
+# DIAL_API_VERSION=2025-01-01-preview        # Optional: API version header for DIAL requests
+
 # Option 2: Use OpenRouter for access to multiple models through one API
 # Get your OpenRouter API key from: https://openrouter.ai/
 # If using OpenRouter, comment out the native API keys above
@@ -27,7 +36,8 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here
 # CUSTOM_MODEL_NAME=llama3.2                          # Default model name

 # Optional: Default model to use
-# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high' etc
+# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
+#          'grok', 'opus-4', 'sonnet-4', or any DIAL model if DIAL is configured
 # When set to 'auto', Claude will select the best model for each task
 # Defaults to 'auto' if not specified
 DEFAULT_MODEL=auto
@@ -70,6 +80,26 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
 #   - grok3           (shorthand for grok-3)
 #   - grokfast        (shorthand for grok-3-fast)
 #
+# Supported DIAL models (when available in your DIAL deployment):
+#   - o3-2025-04-16   (200K context, latest O3 release)
+#   - o4-mini-2025-04-16 (200K context, latest O4 mini)
+#   - o3              (shorthand for o3-2025-04-16)
+#   - o4-mini         (shorthand for o4-mini-2025-04-16)
+#   - anthropic.claude-sonnet-4-20250514-v1:0 (200K context, Claude 4 Sonnet)
+#   - anthropic.claude-sonnet-4-20250514-v1:0-with-thinking (200K context, Claude 4 Sonnet with thinking mode)
+#   - anthropic.claude-opus-4-20250514-v1:0 (200K context, Claude 4 Opus)
+#   - anthropic.claude-opus-4-20250514-v1:0-with-thinking (200K context, Claude 4 Opus with thinking mode)
+#   - sonnet-4        (shorthand for Claude 4 Sonnet)
+#   - sonnet-4-thinking (shorthand for Claude 4 Sonnet with thinking)
+#   - opus-4          (shorthand for Claude 4 Opus)
+#   - opus-4-thinking (shorthand for Claude 4 Opus with thinking)
+#   - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search)
+#   - gemini-2.5-pro-preview-05-06 (1M context, latest preview)
+#   - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview)
+#   - gemini-2.5-pro  (shorthand for gemini-2.5-pro-preview-05-06)
+#   - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search)
+#   - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20)
+#
 # Examples:
 #   OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini  # Only allow mini models (cost control)
 #   GOOGLE_ALLOWED_MODELS=flash                  # Only allow Flash (fast responses)
@@ -77,21 +107,26 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
 #   OPENAI_ALLOWED_MODELS=o4-mini                # Single model standardization
 #   GOOGLE_ALLOWED_MODELS=flash,pro              # Allow both Gemini models
 #   XAI_ALLOWED_MODELS=grok,grok-3-fast          # Allow both GROK variants
+#   DIAL_ALLOWED_MODELS=o3,o4-mini                       # Only allow O3/O4 models via DIAL
+#   DIAL_ALLOWED_MODELS=opus-4,sonnet-4                  # Only Claude 4 models (without thinking)
+#   DIAL_ALLOWED_MODELS=opus-4-thinking,sonnet-4-thinking # Only Claude 4 with thinking mode
+#   DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash  # Only Gemini 2.5 models via DIAL
 #
 # Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models
 # OPENAI_ALLOWED_MODELS=
 # GOOGLE_ALLOWED_MODELS=
 # XAI_ALLOWED_MODELS=
+# DIAL_ALLOWED_MODELS=

 # Optional: Custom model configuration file path
 # Override the default location of custom_models.json
 # CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json

-# Note: Redis is no longer used - conversations are stored in memory
+# Note: Conversations are stored in memory during the session

 # Optional: Conversation timeout (hours)
 # How long AI-to-AI conversation threads persist before expiring
-# Longer timeouts use more Redis memory but allow resuming conversations later
+# Longer timeouts use more memory but allow resuming conversations later
 # Defaults to 3 hours if not specified
 CONVERSATION_TIMEOUT_HOURS=3