Use consistent terminology

This commit is contained in:
Fahad
2025-06-13 09:06:12 +04:00
parent e2762c4ed0
commit b16f85979b
13 changed files with 38 additions and 52 deletions

View File

@@ -47,13 +47,6 @@ MODEL_CAPABILITIES_DESC = {
# - "o3" → "openai/gpt-4o" # - "o3" → "openai/gpt-4o"
# - "o3-mini" → "openai/gpt-4o-mini" # - "o3-mini" → "openai/gpt-4o-mini"
# Token allocation for Gemini Pro (1M total capacity)
# MAX_CONTEXT_TOKENS: Total model capacity
# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
# Temperature defaults for different tool types # Temperature defaults for different tool types
# Temperature controls the randomness/creativity of model responses # Temperature controls the randomness/creativity of model responses

View File

@@ -105,7 +105,7 @@ class ModelCapabilities:
provider: ProviderType provider: ProviderType
model_name: str model_name: str
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI" friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
max_tokens: int context_window: int # Total context window size in tokens
supports_extended_thinking: bool = False supports_extended_thinking: bool = False
supports_system_prompts: bool = True supports_system_prompts: bool = True
supports_streaming: bool = True supports_streaming: bool = True

View File

@@ -14,12 +14,12 @@ class GeminiModelProvider(ModelProvider):
# Model configurations # Model configurations
SUPPORTED_MODELS = { SUPPORTED_MODELS = {
"gemini-2.5-flash-preview-05-20": { "gemini-2.5-flash-preview-05-20": {
"max_tokens": 1_048_576, # 1M tokens "context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True, "supports_extended_thinking": True,
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit "max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
}, },
"gemini-2.5-pro-preview-06-05": { "gemini-2.5-pro-preview-06-05": {
"max_tokens": 1_048_576, # 1M tokens "context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True, "supports_extended_thinking": True,
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit "max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
}, },
@@ -68,7 +68,7 @@ class GeminiModelProvider(ModelProvider):
provider=ProviderType.GOOGLE, provider=ProviderType.GOOGLE,
model_name=resolved_name, model_name=resolved_name,
friendly_name="Gemini", friendly_name="Gemini",
max_tokens=config["max_tokens"], context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"], supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -15,11 +15,11 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
# Model configurations # Model configurations
SUPPORTED_MODELS = { SUPPORTED_MODELS = {
"o3": { "o3": {
"max_tokens": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
}, },
"o3-mini": { "o3-mini": {
"max_tokens": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
}, },
} }
@@ -49,7 +49,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI, provider=ProviderType.OPENAI,
model_name=model_name, model_name=model_name,
friendly_name="OpenAI", friendly_name="OpenAI",
max_tokens=config["max_tokens"], context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"], supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -109,7 +109,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENROUTER, provider=ProviderType.OPENROUTER,
model_name=resolved_name, model_name=resolved_name,
friendly_name=self.FRIENDLY_NAME, friendly_name=self.FRIENDLY_NAME,
max_tokens=32_768, # Conservative default context window context_window=32_768, # Conservative default context window
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -30,7 +30,7 @@ class OpenRouterModelConfig:
provider=ProviderType.OPENROUTER, provider=ProviderType.OPENROUTER,
model_name=self.model_name, model_name=self.model_name,
friendly_name="OpenRouter", friendly_name="OpenRouter",
max_tokens=self.context_window, # ModelCapabilities still uses max_tokens context_window=self.context_window,
supports_extended_thinking=self.supports_extended_thinking, supports_extended_thinking=self.supports_extended_thinking,
supports_system_prompts=self.supports_system_prompts, supports_system_prompts=self.supports_system_prompts,
supports_streaming=self.supports_streaming, supports_streaming=self.supports_streaming,
@@ -103,10 +103,6 @@ class OpenRouterModelRegistry:
# Parse models # Parse models
configs = [] configs = []
for model_data in data.get("models", []): for model_data in data.get("models", []):
# Handle backwards compatibility - rename max_tokens to context_window
if "max_tokens" in model_data and "context_window" not in model_data:
model_data["context_window"] = model_data.pop("max_tokens")
config = OpenRouterModelConfig(**model_data) config = OpenRouterModelConfig(**model_data)
configs.append(config) configs.append(config)

View File

@@ -33,7 +33,6 @@ from mcp.types import ServerCapabilities, TextContent, Tool, ToolsCapability
from config import ( from config import (
DEFAULT_MODEL, DEFAULT_MODEL,
MAX_CONTEXT_TOKENS,
__author__, __author__,
__updated__, __updated__,
__version__, __version__,
@@ -521,7 +520,7 @@ async def handle_get_version() -> list[TextContent]:
"author": __author__, "author": __author__,
"default_model": DEFAULT_MODEL, "default_model": DEFAULT_MODEL,
"default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP, "default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP,
"max_context_tokens": f"{MAX_CONTEXT_TOKENS:,}", "max_context_tokens": "Dynamic (model-specific)",
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
"server_started": datetime.now().isoformat(), "server_started": datetime.now().isoformat(),
"available_tools": list(TOOLS.keys()) + ["get_version"], "available_tools": list(TOOLS.keys()) + ["get_version"],
@@ -547,7 +546,7 @@ Author: {__author__}
Configuration: Configuration:
- Default Model: {DEFAULT_MODEL} - Default Model: {DEFAULT_MODEL}
- Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP} - Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP}
- Max Context: {MAX_CONTEXT_TOKENS:,} tokens - Max Context: Dynamic (model-specific)
- Python: {version_info["python_version"]} - Python: {version_info["python_version"]}
- Started: {version_info["server_started"]} - Started: {version_info["server_started"]}

View File

@@ -4,7 +4,6 @@ Tests for configuration
from config import ( from config import (
DEFAULT_MODEL, DEFAULT_MODEL,
MAX_CONTEXT_TOKENS,
TEMPERATURE_ANALYTICAL, TEMPERATURE_ANALYTICAL,
TEMPERATURE_BALANCED, TEMPERATURE_BALANCED,
TEMPERATURE_CREATIVE, TEMPERATURE_CREATIVE,
@@ -33,7 +32,6 @@ class TestConfig:
"""Test model configuration""" """Test model configuration"""
# DEFAULT_MODEL is set in conftest.py for tests # DEFAULT_MODEL is set in conftest.py for tests
assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20" assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20"
assert MAX_CONTEXT_TOKENS == 1_000_000
def test_temperature_defaults(self): def test_temperature_defaults(self):
"""Test temperature constants""" """Test temperature constants"""

View File

@@ -22,7 +22,7 @@ from typing import Any, Literal, Optional
from mcp.types import TextContent from mcp.types import TextContent
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from config import MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT from config import MCP_PROMPT_SIZE_LIMIT
from providers import ModelProvider, ModelProviderRegistry from providers import ModelProvider, ModelProviderRegistry
from utils import check_token_limit from utils import check_token_limit
from utils.conversation_memory import ( from utils.conversation_memory import (
@@ -414,7 +414,7 @@ class BaseTool(ABC):
request_files: List of files requested for current tool execution request_files: List of files requested for current tool execution
continuation_id: Thread continuation ID, or None for new conversations continuation_id: Thread continuation ID, or None for new conversations
context_description: Description for token limit validation (e.g. "Code", "New files") context_description: Description for token limit validation (e.g. "Code", "New files")
max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS) max_tokens: Maximum tokens to use (defaults to remaining budget or model-specific content allocation)
reserve_tokens: Tokens to reserve for additional prompt content (default 1K) reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
remaining_budget: Remaining token budget after conversation history (from server.py) remaining_budget: Remaining token budget after conversation history (from server.py)
arguments: Original tool arguments (used to extract _remaining_tokens if available) arguments: Original tool arguments (used to extract _remaining_tokens if available)
@@ -473,17 +473,17 @@ class BaseTool(ABC):
capabilities = provider.get_capabilities(model_name) capabilities = provider.get_capabilities(model_name)
# Calculate content allocation based on model capacity # Calculate content allocation based on model capacity
if capabilities.max_tokens < 300_000: if capabilities.context_window < 300_000:
# Smaller context models: 60% content, 40% response # Smaller context models: 60% content, 40% response
model_content_tokens = int(capabilities.max_tokens * 0.6) model_content_tokens = int(capabilities.context_window * 0.6)
else: else:
# Larger context models: 80% content, 20% response # Larger context models: 80% content, 20% response
model_content_tokens = int(capabilities.max_tokens * 0.8) model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = model_content_tokens - reserve_tokens effective_max_tokens = model_content_tokens - reserve_tokens
logger.debug( logger.debug(
f"[FILES] {self.name}: Using model-specific limit for {model_name}: " f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total" f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
) )
except (ValueError, AttributeError) as e: except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes # Handle specific errors: provider not found, model not supported, missing attributes
@@ -491,17 +491,13 @@ class BaseTool(ABC):
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}" f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
) )
# Fall back to conservative default for safety # Fall back to conservative default for safety
from config import MAX_CONTENT_TOKENS effective_max_tokens = 100_000 - reserve_tokens
effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
except Exception as e: except Exception as e:
# Catch any other unexpected errors # Catch any other unexpected errors
logger.error( logger.error(
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}" f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
) )
from config import MAX_CONTENT_TOKENS effective_max_tokens = 100_000 - reserve_tokens
effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
# Ensure we have a reasonable minimum budget # Ensure we have a reasonable minimum budget
effective_max_tokens = max(1000, effective_max_tokens) effective_max_tokens = max(1000, effective_max_tokens)
@@ -1233,7 +1229,7 @@ When recommending searches, be specific about what information you need and why
""" """
return response return response
def _validate_token_limit(self, text: str, context_type: str = "Context") -> None: def _validate_token_limit(self, text: str, context_type: str = "Context", context_window: int = 200_000) -> None:
""" """
Validate token limit and raise ValueError if exceeded. Validate token limit and raise ValueError if exceeded.
@@ -1243,14 +1239,15 @@ When recommending searches, be specific about what information you need and why
Args: Args:
text: The text to check text: The text to check
context_type: Description of what's being checked (for error message) context_type: Description of what's being checked (for error message)
context_window: The model's context window size
Raises: Raises:
ValueError: If text exceeds MAX_CONTEXT_TOKENS ValueError: If text exceeds context_window
""" """
within_limit, estimated_tokens = check_token_limit(text) within_limit, estimated_tokens = check_token_limit(text, context_window)
if not within_limit: if not within_limit:
raise ValueError( raise ValueError(
f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {MAX_CONTEXT_TOKENS:,} tokens." f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {context_window:,} tokens."
) )
def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]: def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]:

View File

@@ -14,7 +14,8 @@ from typing import Any, Literal, Optional
from mcp.types import TextContent from mcp.types import TextContent
from pydantic import Field from pydantic import Field
from config import MAX_CONTEXT_TOKENS # Conservative fallback for token limits
DEFAULT_CONTEXT_WINDOW = 200_000
from prompts.tool_prompts import PRECOMMIT_PROMPT from prompts.tool_prompts import PRECOMMIT_PROMPT
from utils.file_utils import translate_file_paths, translate_path_for_environment from utils.file_utils import translate_file_paths, translate_path_for_environment
from utils.git_utils import find_git_repositories, get_git_status, run_git_command from utils.git_utils import find_git_repositories, get_git_status, run_git_command
@@ -186,7 +187,7 @@ class Precommit(BaseTool):
all_diffs = [] all_diffs = []
repo_summaries = [] repo_summaries = []
total_tokens = 0 total_tokens = 0
max_tokens = MAX_CONTEXT_TOKENS - 50000 # Reserve tokens for prompt and response max_tokens = DEFAULT_CONTEXT_WINDOW - 50000 # Reserve tokens for prompt and response
for repo_path in repositories: for repo_path in repositories:
repo_name = os.path.basename(repo_path) or "root" repo_name = os.path.basename(repo_path) or "root"

View File

@@ -23,7 +23,7 @@ import os
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from .token_utils import MAX_CONTEXT_TOKENS, estimate_tokens from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -508,14 +508,14 @@ def read_files(
Args: Args:
file_paths: List of file or directory paths (absolute paths required) file_paths: List of file or directory paths (absolute paths required)
code: Optional direct code to include (prioritized over files) code: Optional direct code to include (prioritized over files)
max_tokens: Maximum tokens to use (defaults to MAX_CONTEXT_TOKENS) max_tokens: Maximum tokens to use (defaults to DEFAULT_CONTEXT_WINDOW)
reserve_tokens: Tokens to reserve for prompt and response (default 50K) reserve_tokens: Tokens to reserve for prompt and response (default 50K)
Returns: Returns:
str: All file contents formatted for AI consumption str: All file contents formatted for AI consumption
""" """
if max_tokens is None: if max_tokens is None:
max_tokens = MAX_CONTEXT_TOKENS max_tokens = DEFAULT_CONTEXT_WINDOW
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths") logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
logger.debug( logger.debug(

View File

@@ -72,7 +72,7 @@ class ModelContext:
Returns: Returns:
TokenAllocation with calculated budgets TokenAllocation with calculated budgets
""" """
total_tokens = self.capabilities.max_tokens total_tokens = self.capabilities.context_window
# Dynamic allocation based on model capacity # Dynamic allocation based on model capacity
if total_tokens < 300_000: if total_tokens < 300_000:

View File

@@ -9,7 +9,8 @@ approximate. For production systems requiring precise token counts,
consider using the actual tokenizer for the specific model. consider using the actual tokenizer for the specific model.
""" """
from config import MAX_CONTEXT_TOKENS # Default fallback for token limit (conservative estimate)
DEFAULT_CONTEXT_WINDOW = 200_000 # Conservative fallback for unknown models
def estimate_tokens(text: str) -> int: def estimate_tokens(text: str) -> int:
@@ -32,9 +33,9 @@ def estimate_tokens(text: str) -> int:
return len(text) // 4 return len(text) // 4
def check_token_limit(text: str) -> tuple[bool, int]: def check_token_limit(text: str, context_window: int = DEFAULT_CONTEXT_WINDOW) -> tuple[bool, int]:
""" """
Check if text exceeds the maximum token limit for Gemini models. Check if text exceeds the specified token limit.
This function is used to validate that prepared prompts will fit This function is used to validate that prepared prompts will fit
within the model's context window, preventing API errors and ensuring within the model's context window, preventing API errors and ensuring
@@ -42,11 +43,12 @@ def check_token_limit(text: str) -> tuple[bool, int]:
Args: Args:
text: The text to check text: The text to check
context_window: The model's context window size (defaults to conservative fallback)
Returns: Returns:
Tuple[bool, int]: (is_within_limit, estimated_tokens) Tuple[bool, int]: (is_within_limit, estimated_tokens)
- is_within_limit: True if the text fits within MAX_CONTEXT_TOKENS - is_within_limit: True if the text fits within context_window
- estimated_tokens: The estimated token count - estimated_tokens: The estimated token count
""" """
estimated = estimate_tokens(text) estimated = estimate_tokens(text)
return estimated <= MAX_CONTEXT_TOKENS, estimated return estimated <= context_window, estimated