my-pal-mcp-server/.env.example

# Zen MCP Server Environment Configuration
# Copy this file to .env and fill in your values

# API Keys - At least one is required
#
# IMPORTANT: Choose ONE approach:
# - Native APIs (Gemini/OpenAI/XAI) for direct access
# - DIAL for unified enterprise access
# - OpenRouter for unified cloud access
# Having multiple unified providers creates ambiguity about which serves each model.
#
# Option 1: Use native APIs (recommended for direct access)
# Get your Gemini API key from: https://makersuite.google.com/app/apikey
GEMINI_API_KEY=your_gemini_api_key_here

# Get your OpenAI API key from: https://platform.openai.com/api-keys
OPENAI_API_KEY=your_openai_api_key_here

# Get your X.AI API key from: https://console.x.ai/
XAI_API_KEY=your_xai_api_key_here

# Get your DIAL API key and configure host URL
# DIAL provides unified access to multiple AI models through a single API
DIAL_API_KEY=your_dial_api_key_here
# DIAL_API_HOST=https://core.dialx.ai        # Optional: Base URL without /openai suffix (auto-appended)
# DIAL_API_VERSION=2025-01-01-preview        # Optional: API version header for DIAL requests

# Option 2: Use OpenRouter for access to multiple models through one API
# Get your OpenRouter API key from: https://openrouter.ai/
# If using OpenRouter, comment out the native API keys above
OPENROUTER_API_KEY=your_openrouter_api_key_here

# Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.)
# CUSTOM_API_URL=http://localhost:11434/v1  # Ollama example
# CUSTOM_API_KEY=                                      # Empty for Ollama (no auth needed)
# CUSTOM_MODEL_NAME=llama3.2                          # Default model name

# Optional: Default model to use
# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
#          'grok', 'opus-4', 'sonnet-4', or any DIAL model if DIAL is configured
# When set to 'auto', Claude will select the best model for each task
# Defaults to 'auto' if not specified
DEFAULT_MODEL=auto

# Optional: Default thinking mode for ThinkDeep tool
# NOTE: Only applies to models that support extended thinking (e.g., Gemini 2.5 Pro)
#       Flash models (2.0) will use system prompt engineering instead
# Token consumption per mode:
#   minimal: 128 tokens   - Quick analysis, fastest response
#   low:     2,048 tokens - Light reasoning tasks
#   medium:  8,192 tokens - Balanced reasoning (good for most cases)
#   high:    16,384 tokens - Complex analysis (recommended for thinkdeep)
#   max:     32,768 tokens - Maximum reasoning depth, slowest but most thorough
# Defaults to 'high' if not specified
DEFAULT_THINKING_MODE_THINKDEEP=high

# Optional: Model usage restrictions
# Limit which models can be used from each provider for cost control, compliance, or standardization
# Format: Comma-separated list of allowed model names (case-insensitive, whitespace tolerant)
# Empty or unset = all models allowed (default behavior)
# If you want to disable a provider entirely, don't set its API key
#
# Supported OpenAI models:
#   - o3               (200K context, high reasoning)
#   - o3-mini          (200K context, balanced)
#   - o4-mini          (200K context, latest balanced, temperature=1.0 only)
#   - o4-mini-high     (200K context, enhanced reasoning, temperature=1.0 only)
#   - mini             (shorthand for o4-mini)
#
# Supported Google/Gemini models:
#   - gemini-2.5-flash   (1M context, fast, supports thinking)
#   - gemini-2.5-pro     (1M context, powerful, supports thinking)
#   - flash                             (shorthand for gemini-2.5-flash)
#   - pro                               (shorthand for gemini-2.5-pro)
#
# Supported X.AI GROK models:
#   - grok-3          (131K context, advanced reasoning)
#   - grok-3-fast     (131K context, higher performance but more expensive)
#   - grok            (shorthand for grok-3)
#   - grok3           (shorthand for grok-3)
#   - grokfast        (shorthand for grok-3-fast)
#
# Supported DIAL models (when available in your DIAL deployment):
#   - o3-2025-04-16   (200K context, latest O3 release)
#   - o4-mini-2025-04-16 (200K context, latest O4 mini)
#   - o3              (shorthand for o3-2025-04-16)
#   - o4-mini         (shorthand for o4-mini-2025-04-16)
#   - anthropic.claude-sonnet-4-20250514-v1:0 (200K context, Claude 4 Sonnet)
#   - anthropic.claude-sonnet-4-20250514-v1:0-with-thinking (200K context, Claude 4 Sonnet with thinking mode)
#   - anthropic.claude-opus-4-20250514-v1:0 (200K context, Claude 4 Opus)
#   - anthropic.claude-opus-4-20250514-v1:0-with-thinking (200K context, Claude 4 Opus with thinking mode)
#   - sonnet-4        (shorthand for Claude 4 Sonnet)
#   - sonnet-4-thinking (shorthand for Claude 4 Sonnet with thinking)
#   - opus-4          (shorthand for Claude 4 Opus)
#   - opus-4-thinking (shorthand for Claude 4 Opus with thinking)
#   - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search)
#   - gemini-2.5-pro-preview-05-06 (1M context, latest preview)
#   - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview)
#   - gemini-2.5-pro  (shorthand for gemini-2.5-pro-preview-05-06)
#   - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search)
#   - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20)
#
# Examples:
#   OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini  # Only allow mini models (cost control)
#   GOOGLE_ALLOWED_MODELS=flash                  # Only allow Flash (fast responses)
#   XAI_ALLOWED_MODELS=grok-3                    # Only allow standard GROK (not fast variant)
#   OPENAI_ALLOWED_MODELS=o4-mini                # Single model standardization
#   GOOGLE_ALLOWED_MODELS=flash,pro              # Allow both Gemini models
#   XAI_ALLOWED_MODELS=grok,grok-3-fast          # Allow both GROK variants
#   DIAL_ALLOWED_MODELS=o3,o4-mini                       # Only allow O3/O4 models via DIAL
#   DIAL_ALLOWED_MODELS=opus-4,sonnet-4                  # Only Claude 4 models (without thinking)
#   DIAL_ALLOWED_MODELS=opus-4-thinking,sonnet-4-thinking # Only Claude 4 with thinking mode
#   DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash  # Only Gemini 2.5 models via DIAL
#
# Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models
# OPENAI_ALLOWED_MODELS=
# GOOGLE_ALLOWED_MODELS=
# XAI_ALLOWED_MODELS=
# DIAL_ALLOWED_MODELS=

# Optional: Custom model configuration file path
# Override the default location of custom_models.json
# CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json

# Note: Conversations are stored in memory during the session

# Optional: Conversation timeout (hours)
# How long AI-to-AI conversation threads persist before expiring
# Longer timeouts use more memory but allow resuming conversations later
# Defaults to 3 hours if not specified
CONVERSATION_TIMEOUT_HOURS=3

# Optional: Max conversation turns
# Maximum number of turns allowed in an AI-to-AI conversation thread
# Each exchange (Claude asks, Gemini responds) counts as 2 turns
# So 20 turns = 10 exchanges. Defaults to 20 if not specified
MAX_CONVERSATION_TURNS=20

# Optional: Logging level (DEBUG, INFO, WARNING, ERROR)
# DEBUG: Shows detailed operational messages for troubleshooting (default)
# INFO: Shows general operational messages
# WARNING: Shows only warnings and errors
# ERROR: Shows only errors
LOG_LEVEL=DEBUG

# Optional: Tool Selection
# Comma-separated list of tools to disable. If not set, all tools are enabled.
# Essential tools (version, listmodels) cannot be disabled.
# Available tools: chat, thinkdeep, planner, consensus, codereview, precommit,
#                  debug, docgen, analyze, refactor, tracer, testgen
# Examples:
# DISABLED_TOOLS=                    # All tools enabled (default)
# DISABLED_TOOLS=debug,tracer        # Disable debug and tracer tools
# DISABLED_TOOLS=planner,consensus   # Disable planning tools