WIP - improvements to token usage tracking, simulator added for live testing, improvements to file loading
This commit is contained in:
12
config.py
12
config.py
@@ -23,11 +23,13 @@ __author__ = "Fahad Gilani" # Primary maintainer
|
||||
# This should be a stable, high-performance model suitable for code analysis
|
||||
GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"
|
||||
|
||||
# MAX_CONTEXT_TOKENS: Maximum number of tokens that can be included in a single request
|
||||
# This limit includes both the prompt and expected response
|
||||
# Gemini Pro models support up to 1M tokens, but practical usage should reserve
|
||||
# space for the model's response (typically 50K-100K tokens reserved)
|
||||
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro
|
||||
# Token allocation for Gemini Pro (1M total capacity)
|
||||
# MAX_CONTEXT_TOKENS: Total model capacity
|
||||
# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
|
||||
# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
|
||||
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
|
||||
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
|
||||
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
|
||||
|
||||
# Temperature defaults for different tool types
|
||||
# Temperature controls the randomness/creativity of model responses
|
||||
|
||||
Reference in New Issue
Block a user