WIP - improvements to token usage tracking, simulator added for live testing, improvements to file loading

2025-06-11 13:24:59 +04:00
parent 5a94737516
commit 98eab46abf
13 changed files with 1383 additions and 64 deletions
--- a/config.py
+++ b/config.py
@@ -23,11 +23,13 @@ __author__ = "Fahad Gilani"  # Primary maintainer
 # This should be a stable, high-performance model suitable for code analysis
 GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"

-# MAX_CONTEXT_TOKENS: Maximum number of tokens that can be included in a single request
-# This limit includes both the prompt and expected response
-# Gemini Pro models support up to 1M tokens, but practical usage should reserve
-# space for the model's response (typically 50K-100K tokens reserved)
-MAX_CONTEXT_TOKENS = 1_000_000  # 1M tokens for Gemini Pro
+# Token allocation for Gemini Pro (1M total capacity)
+# MAX_CONTEXT_TOKENS: Total model capacity
+# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
+# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
+MAX_CONTEXT_TOKENS = 1_000_000  # 1M tokens total capacity for Gemini Pro
+MAX_CONTENT_TOKENS = 800_000    # 800K tokens for content (prompts + files + history)
+RESPONSE_RESERVE_TOKENS = 200_000  # 200K tokens reserved for response generation

 # Temperature defaults for different tool types
 # Temperature controls the randomness/creativity of model responses