# Zen MCP Server Environment Configuration # Copy this file to .env and fill in your values # API Keys - At least one is required # # IMPORTANT: Choose ONE approach: # - Native APIs (Gemini/OpenAI/XAI) for direct access # - DIAL for unified enterprise access # - OpenRouter for unified cloud access # Having multiple unified providers creates ambiguity about which serves each model. # # Option 1: Use native APIs (recommended for direct access) # Get your Gemini API key from: https://makersuite.google.com/app/apikey GEMINI_API_KEY=your_gemini_api_key_here # Get your OpenAI API key from: https://platform.openai.com/api-keys OPENAI_API_KEY=your_openai_api_key_here # Get your X.AI API key from: https://console.x.ai/ XAI_API_KEY=your_xai_api_key_here # Get your DIAL API key and configure host URL # DIAL provides unified access to multiple AI models through a single API DIAL_API_KEY=your_dial_api_key_here # DIAL_API_HOST=https://core.dialx.ai # Optional: Base URL without /openai suffix (auto-appended) # DIAL_API_VERSION=2025-01-01-preview # Optional: API version header for DIAL requests # Option 2: Use OpenRouter for access to multiple models through one API # Get your OpenRouter API key from: https://openrouter.ai/ # If using OpenRouter, comment out the native API keys above OPENROUTER_API_KEY=your_openrouter_api_key_here # Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.) # CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example # CUSTOM_API_KEY= # Empty for Ollama (no auth needed) # CUSTOM_MODEL_NAME=llama3.2 # Default model name # Optional: Default model to use # Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high', # 'grok', 'opus-4', 'sonnet-4', or any DIAL model if DIAL is configured # When set to 'auto', Claude will select the best model for each task # Defaults to 'auto' if not specified DEFAULT_MODEL=auto # Optional: Default thinking mode for ThinkDeep tool # NOTE: Only applies to models that support extended thinking (e.g., Gemini 2.5 Pro) # Flash models (2.0) will use system prompt engineering instead # Token consumption per mode: # minimal: 128 tokens - Quick analysis, fastest response # low: 2,048 tokens - Light reasoning tasks # medium: 8,192 tokens - Balanced reasoning (good for most cases) # high: 16,384 tokens - Complex analysis (recommended for thinkdeep) # max: 32,768 tokens - Maximum reasoning depth, slowest but most thorough # Defaults to 'high' if not specified DEFAULT_THINKING_MODE_THINKDEEP=high # Optional: Model usage restrictions # Limit which models can be used from each provider for cost control, compliance, or standardization # Format: Comma-separated list of allowed model names (case-insensitive, whitespace tolerant) # Empty or unset = all models allowed (default behavior) # If you want to disable a provider entirely, don't set its API key # # Supported OpenAI models: # - o3 (200K context, high reasoning) # - o3-mini (200K context, balanced) # - o4-mini (200K context, latest balanced, temperature=1.0 only) # - o4-mini-high (200K context, enhanced reasoning, temperature=1.0 only) # - mini (shorthand for o4-mini) # # Supported Google/Gemini models: # - gemini-2.5-flash (1M context, fast, supports thinking) # - gemini-2.5-pro (1M context, powerful, supports thinking) # - flash (shorthand for gemini-2.5-flash) # - pro (shorthand for gemini-2.5-pro) # # Supported X.AI GROK models: # - grok-3 (131K context, advanced reasoning) # - grok-3-fast (131K context, higher performance but more expensive) # - grok (shorthand for grok-3) # - grok3 (shorthand for grok-3) # - grokfast (shorthand for grok-3-fast) # # Supported DIAL models (when available in your DIAL deployment): # - o3-2025-04-16 (200K context, latest O3 release) # - o4-mini-2025-04-16 (200K context, latest O4 mini) # - o3 (shorthand for o3-2025-04-16) # - o4-mini (shorthand for o4-mini-2025-04-16) # - anthropic.claude-sonnet-4-20250514-v1:0 (200K context, Claude 4 Sonnet) # - anthropic.claude-sonnet-4-20250514-v1:0-with-thinking (200K context, Claude 4 Sonnet with thinking mode) # - anthropic.claude-opus-4-20250514-v1:0 (200K context, Claude 4 Opus) # - anthropic.claude-opus-4-20250514-v1:0-with-thinking (200K context, Claude 4 Opus with thinking mode) # - sonnet-4 (shorthand for Claude 4 Sonnet) # - sonnet-4-thinking (shorthand for Claude 4 Sonnet with thinking) # - opus-4 (shorthand for Claude 4 Opus) # - opus-4-thinking (shorthand for Claude 4 Opus with thinking) # - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search) # - gemini-2.5-pro-preview-05-06 (1M context, latest preview) # - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview) # - gemini-2.5-pro (shorthand for gemini-2.5-pro-preview-05-06) # - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search) # - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20) # # Examples: # OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini # Only allow mini models (cost control) # GOOGLE_ALLOWED_MODELS=flash # Only allow Flash (fast responses) # XAI_ALLOWED_MODELS=grok-3 # Only allow standard GROK (not fast variant) # OPENAI_ALLOWED_MODELS=o4-mini # Single model standardization # GOOGLE_ALLOWED_MODELS=flash,pro # Allow both Gemini models # XAI_ALLOWED_MODELS=grok,grok-3-fast # Allow both GROK variants # DIAL_ALLOWED_MODELS=o3,o4-mini # Only allow O3/O4 models via DIAL # DIAL_ALLOWED_MODELS=opus-4,sonnet-4 # Only Claude 4 models (without thinking) # DIAL_ALLOWED_MODELS=opus-4-thinking,sonnet-4-thinking # Only Claude 4 with thinking mode # DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash # Only Gemini 2.5 models via DIAL # # Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models # OPENAI_ALLOWED_MODELS= # GOOGLE_ALLOWED_MODELS= # XAI_ALLOWED_MODELS= # DIAL_ALLOWED_MODELS= # Optional: Custom model configuration file path # Override the default location of custom_models.json # CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json # Note: Conversations are stored in memory during the session # Optional: Conversation timeout (hours) # How long AI-to-AI conversation threads persist before expiring # Longer timeouts use more memory but allow resuming conversations later # Defaults to 3 hours if not specified CONVERSATION_TIMEOUT_HOURS=3 # Optional: Max conversation turns # Maximum number of turns allowed in an AI-to-AI conversation thread # Each exchange (Claude asks, Gemini responds) counts as 2 turns # So 20 turns = 10 exchanges. Defaults to 20 if not specified MAX_CONVERSATION_TURNS=20 # Optional: Logging level (DEBUG, INFO, WARNING, ERROR) # DEBUG: Shows detailed operational messages for troubleshooting (default) # INFO: Shows general operational messages # WARNING: Shows only warnings and errors # ERROR: Shows only errors LOG_LEVEL=DEBUG