WIP - improvements to token usage tracking, simulator added for live testing, improvements to file loading

This commit is contained in:
Fahad
2025-06-11 13:24:59 +04:00
parent 5a94737516
commit 98eab46abf
13 changed files with 1383 additions and 64 deletions

View File

@@ -23,11 +23,13 @@ __author__ = "Fahad Gilani" # Primary maintainer
# This should be a stable, high-performance model suitable for code analysis
GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"
# MAX_CONTEXT_TOKENS: Maximum number of tokens that can be included in a single request
# This limit includes both the prompt and expected response
# Gemini Pro models support up to 1M tokens, but practical usage should reserve
# space for the model's response (typically 50K-100K tokens reserved)
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro
# Token allocation for Gemini Pro (1M total capacity)
# MAX_CONTEXT_TOKENS: Total model capacity
# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
# Temperature defaults for different tool types
# Temperature controls the randomness/creativity of model responses