Merge remote-tracking branch 'upstream/main' into feat/comprehensive-project-improvements

This commit is contained in:
Patryk Ciechanski
2025-06-13 07:41:03 +02:00
20 changed files with 72 additions and 98 deletions

6
.gitignore vendored
View File

@@ -165,9 +165,3 @@ test_simulation_files/.claude/
# Temporary test directories # Temporary test directories
test-setup/ test-setup/
/test_simulation_files/**
# Remove Claude configuration
.mcp.json
Claude.md
memory-bank

View File

@@ -47,13 +47,6 @@ MODEL_CAPABILITIES_DESC = {
# - "o3" → "openai/gpt-4o" # - "o3" → "openai/gpt-4o"
# - "o3-mini" → "openai/gpt-4o-mini" # - "o3-mini" → "openai/gpt-4o-mini"
# Token allocation for Gemini Pro (1M total capacity)
# MAX_CONTEXT_TOKENS: Total model capacity
# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
# Temperature defaults for different tool types # Temperature defaults for different tool types
# Temperature controls the randomness/creativity of model responses # Temperature controls the randomness/creativity of model responses

View File

@@ -105,7 +105,7 @@ class ModelCapabilities:
provider: ProviderType provider: ProviderType
model_name: str model_name: str
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI" friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
max_tokens: int context_window: int # Total context window size in tokens
supports_extended_thinking: bool = False supports_extended_thinking: bool = False
supports_system_prompts: bool = True supports_system_prompts: bool = True
supports_streaming: bool = True supports_streaming: bool = True

View File

@@ -14,12 +14,12 @@ class GeminiModelProvider(ModelProvider):
# Model configurations # Model configurations
SUPPORTED_MODELS = { SUPPORTED_MODELS = {
"gemini-2.5-flash-preview-05-20": { "gemini-2.5-flash-preview-05-20": {
"max_tokens": 1_048_576, # 1M tokens "context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True, "supports_extended_thinking": True,
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit "max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
}, },
"gemini-2.5-pro-preview-06-05": { "gemini-2.5-pro-preview-06-05": {
"max_tokens": 1_048_576, # 1M tokens "context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True, "supports_extended_thinking": True,
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit "max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
}, },
@@ -68,7 +68,7 @@ class GeminiModelProvider(ModelProvider):
provider=ProviderType.GOOGLE, provider=ProviderType.GOOGLE,
model_name=resolved_name, model_name=resolved_name,
friendly_name="Gemini", friendly_name="Gemini",
max_tokens=config["max_tokens"], context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"], supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -15,11 +15,11 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
# Model configurations # Model configurations
SUPPORTED_MODELS = { SUPPORTED_MODELS = {
"o3": { "o3": {
"max_tokens": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
}, },
"o3-mini": { "o3-mini": {
"max_tokens": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
}, },
} }
@@ -49,7 +49,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI, provider=ProviderType.OPENAI,
model_name=model_name, model_name=model_name,
friendly_name="OpenAI", friendly_name="OpenAI",
max_tokens=config["max_tokens"], context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"], supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -109,7 +109,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENROUTER, provider=ProviderType.OPENROUTER,
model_name=resolved_name, model_name=resolved_name,
friendly_name=self.FRIENDLY_NAME, friendly_name=self.FRIENDLY_NAME,
max_tokens=32_768, # Conservative default context window context_window=32_768, # Conservative default context window
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -30,7 +30,7 @@ class OpenRouterModelConfig:
provider=ProviderType.OPENROUTER, provider=ProviderType.OPENROUTER,
model_name=self.model_name, model_name=self.model_name,
friendly_name="OpenRouter", friendly_name="OpenRouter",
max_tokens=self.context_window, # ModelCapabilities still uses max_tokens context_window=self.context_window,
supports_extended_thinking=self.supports_extended_thinking, supports_extended_thinking=self.supports_extended_thinking,
supports_system_prompts=self.supports_system_prompts, supports_system_prompts=self.supports_system_prompts,
supports_streaming=self.supports_streaming, supports_streaming=self.supports_streaming,
@@ -103,10 +103,6 @@ class OpenRouterModelRegistry:
# Parse models # Parse models
configs = [] configs = []
for model_data in data.get("models", []): for model_data in data.get("models", []):
# Handle backwards compatibility - rename max_tokens to context_window
if "max_tokens" in model_data and "context_window" not in model_data:
model_data["context_window"] = model_data.pop("max_tokens")
config = OpenRouterModelConfig(**model_data) config = OpenRouterModelConfig(**model_data)
configs.append(config) configs.append(config)

View File

@@ -33,7 +33,6 @@ from mcp.types import ServerCapabilities, TextContent, Tool, ToolsCapability
from config import ( from config import (
DEFAULT_MODEL, DEFAULT_MODEL,
MAX_CONTEXT_TOKENS,
__author__, __author__,
__updated__, __updated__,
__version__, __version__,
@@ -158,24 +157,6 @@ def configure_providers():
has_openrouter = True has_openrouter = True
logger.info("OpenRouter API key found - Multiple models available via OpenRouter") logger.info("OpenRouter API key found - Multiple models available via OpenRouter")
# Check for conflicting configuration
if has_native_apis and has_openrouter:
logger.warning(
"\n" + "=" * 70 + "\n"
"WARNING: Both OpenRouter and native API keys detected!\n"
"\n"
"This creates ambiguity about which provider will be used for models\n"
"available through both APIs (e.g., 'o3' could come from OpenAI or OpenRouter).\n"
"\n"
"RECOMMENDATION: Use EITHER OpenRouter OR native APIs, not both.\n"
"\n"
"To fix this:\n"
"1. Use only OpenRouter: unset GEMINI_API_KEY and OPENAI_API_KEY\n"
"2. Use only native APIs: unset OPENROUTER_API_KEY\n"
"\n"
"Current configuration will prioritize native APIs over OpenRouter.\n" + "=" * 70 + "\n"
)
# Register providers - native APIs first to ensure they take priority # Register providers - native APIs first to ensure they take priority
if has_native_apis: if has_native_apis:
if gemini_key and gemini_key != "your_gemini_api_key_here": if gemini_key and gemini_key != "your_gemini_api_key_here":
@@ -539,7 +520,7 @@ async def handle_get_version() -> list[TextContent]:
"author": __author__, "author": __author__,
"default_model": DEFAULT_MODEL, "default_model": DEFAULT_MODEL,
"default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP, "default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP,
"max_context_tokens": f"{MAX_CONTEXT_TOKENS:,}", "max_context_tokens": "Dynamic (model-specific)",
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
"server_started": datetime.now().isoformat(), "server_started": datetime.now().isoformat(),
"available_tools": list(TOOLS.keys()) + ["get_version"], "available_tools": list(TOOLS.keys()) + ["get_version"],
@@ -565,7 +546,7 @@ Author: {__author__}
Configuration: Configuration:
- Default Model: {DEFAULT_MODEL} - Default Model: {DEFAULT_MODEL}
- Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP} - Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP}
- Max Context: {MAX_CONTEXT_TOKENS:,} tokens - Max Context: Dynamic (model-specific)
- Python: {version_info["python_version"]} - Python: {version_info["python_version"]}
- Started: {version_info["server_started"]} - Started: {version_info["server_started"]}

View File

@@ -11,6 +11,7 @@ Validates that:
4. Docker logs show deduplication behavior 4. Docker logs show deduplication behavior
""" """
import os
import subprocess import subprocess
from .base_test import BaseSimulatorTest from .base_test import BaseSimulatorTest
@@ -98,14 +99,17 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
# Setup test files # Setup test files
self.setup_test_files() self.setup_test_files()
# Create a short dummy file for quick testing # Create a short dummy file for quick testing in the current repo
dummy_content = """def add(a, b): dummy_content = """def add(a, b):
return a + b # Missing type hints return a + b # Missing type hints
def divide(x, y): def divide(x, y):
return x / y # No zero check return x / y # No zero check
""" """
dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content) # Create the file in the current git repo directory to make it show up in git status
dummy_file_path = os.path.join(os.getcwd(), "dummy_code.py")
with open(dummy_file_path, "w") as f:
f.write(dummy_content)
# Get timestamp for log filtering # Get timestamp for log filtering
import datetime import datetime
@@ -162,7 +166,10 @@ def divide(x, y):
def subtract(a, b): def subtract(a, b):
return a - b return a - b
""" """
new_file_path = self.create_additional_test_file("new_feature.py", new_file_content) # Create another temp file in the current repo for git changes
new_file_path = os.path.join(os.getcwd(), "new_feature.py")
with open(new_file_path, "w") as f:
f.write(new_file_content)
# Continue precommit with both files # Continue precommit with both files
continue_params = { continue_params = {
@@ -249,4 +256,11 @@ def subtract(a, b):
self.logger.error(f"File deduplication workflow test failed: {e}") self.logger.error(f"File deduplication workflow test failed: {e}")
return False return False
finally: finally:
# Clean up temp files created in current repo
temp_files = ["dummy_code.py", "new_feature.py"]
for temp_file in temp_files:
temp_path = os.path.join(os.getcwd(), temp_file)
if os.path.exists(temp_path):
os.remove(temp_path)
self.logger.debug(f"Removed temp file: {temp_path}")
self.cleanup_test_files() self.cleanup_test_files()

View File

@@ -5,7 +5,7 @@ from unittest.mock import Mock
from providers.base import ModelCapabilities, ProviderType, RangeTemperatureConstraint from providers.base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", max_tokens=1_048_576): def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", context_window=1_048_576):
"""Create a properly configured mock provider.""" """Create a properly configured mock provider."""
mock_provider = Mock() mock_provider = Mock()
@@ -14,7 +14,7 @@ def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", max_tokens
provider=ProviderType.GOOGLE, provider=ProviderType.GOOGLE,
model_name=model_name, model_name=model_name,
friendly_name="Gemini", friendly_name="Gemini",
max_tokens=max_tokens, context_window=context_window,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -4,7 +4,6 @@ Tests for configuration
from config import ( from config import (
DEFAULT_MODEL, DEFAULT_MODEL,
MAX_CONTEXT_TOKENS,
TEMPERATURE_ANALYTICAL, TEMPERATURE_ANALYTICAL,
TEMPERATURE_BALANCED, TEMPERATURE_BALANCED,
TEMPERATURE_CREATIVE, TEMPERATURE_CREATIVE,
@@ -33,7 +32,6 @@ class TestConfig:
"""Test model configuration""" """Test model configuration"""
# DEFAULT_MODEL is set in conftest.py for tests # DEFAULT_MODEL is set in conftest.py for tests
assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20" assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20"
assert MAX_CONTEXT_TOKENS == 1_000_000
def test_temperature_defaults(self): def test_temperature_defaults(self):
"""Test temperature constants""" """Test temperature constants"""

View File

@@ -77,7 +77,7 @@ async def test_conversation_history_field_mapping():
provider=ProviderType.GOOGLE, provider=ProviderType.GOOGLE,
model_name="gemini-2.5-flash-preview-05-20", model_name="gemini-2.5-flash-preview-05-20",
friendly_name="Gemini", friendly_name="Gemini",
max_tokens=200000, context_window=200000,
supports_extended_thinking=True, supports_extended_thinking=True,
) )
mock_get_provider.return_value = mock_provider mock_get_provider.return_value = mock_provider

View File

@@ -61,7 +61,7 @@ class TestOpenRouterProvider:
caps = provider.get_capabilities("unknown-model") caps = provider.get_capabilities("unknown-model")
assert caps.provider == ProviderType.OPENROUTER assert caps.provider == ProviderType.OPENROUTER
assert caps.model_name == "unknown-model" assert caps.model_name == "unknown-model"
assert caps.max_tokens == 32_768 # Safe default assert caps.context_window == 32_768 # Safe default
assert hasattr(caps, "_is_generic") and caps._is_generic is True assert hasattr(caps, "_is_generic") and caps._is_generic is True
def test_model_alias_resolution(self): def test_model_alias_resolution(self):
@@ -139,7 +139,7 @@ class TestOpenRouterRegistry:
caps = registry.get_capabilities("opus") caps = registry.get_capabilities("opus")
assert caps is not None assert caps is not None
assert caps.model_name == "anthropic/claude-3-opus" assert caps.model_name == "anthropic/claude-3-opus"
assert caps.max_tokens == 200000 # Claude's context window assert caps.context_window == 200000 # Claude's context window
# Test using full model name # Test using full model name
caps = registry.get_capabilities("anthropic/claude-3-opus") caps = registry.get_capabilities("anthropic/claude-3-opus")

View File

@@ -120,7 +120,7 @@ class TestOpenRouterModelRegistry:
assert caps.provider == ProviderType.OPENROUTER assert caps.provider == ProviderType.OPENROUTER
assert caps.model_name == "anthropic/claude-3-opus" assert caps.model_name == "anthropic/claude-3-opus"
assert caps.friendly_name == "OpenRouter" assert caps.friendly_name == "OpenRouter"
assert caps.max_tokens == 200000 assert caps.context_window == 200000
assert not caps.supports_extended_thinking assert not caps.supports_extended_thinking
def test_duplicate_alias_detection(self): def test_duplicate_alias_detection(self):
@@ -147,13 +147,13 @@ class TestOpenRouterModelRegistry:
os.unlink(temp_path) os.unlink(temp_path)
def test_backwards_compatibility_max_tokens(self): def test_backwards_compatibility_max_tokens(self):
"""Test backwards compatibility with old max_tokens field.""" """Test that old max_tokens field is no longer supported (should result in empty registry)."""
config_data = { config_data = {
"models": [ "models": [
{ {
"model_name": "test/old-model", "model_name": "test/old-model",
"aliases": ["old"], "aliases": ["old"],
"max_tokens": 16384, # Old field name "max_tokens": 16384, # Old field name should cause error
"supports_extended_thinking": False, "supports_extended_thinking": False,
} }
] ]
@@ -164,15 +164,12 @@ class TestOpenRouterModelRegistry:
temp_path = f.name temp_path = f.name
try: try:
# Should gracefully handle the error and result in empty registry
registry = OpenRouterModelRegistry(config_path=temp_path) registry = OpenRouterModelRegistry(config_path=temp_path)
config = registry.resolve("old") # Registry should be empty due to config error
assert len(registry.list_models()) == 0
assert config is not None assert len(registry.list_aliases()) == 0
assert config.context_window == 16384 # Should be converted assert registry.resolve("old") is None
# Check capabilities still work
caps = config.to_capabilities()
assert caps.max_tokens == 16384
finally: finally:
os.unlink(temp_path) os.unlink(temp_path)
@@ -215,7 +212,7 @@ class TestOpenRouterModelRegistry:
) )
caps = config.to_capabilities() caps = config.to_capabilities()
assert caps.max_tokens == 128000 assert caps.context_window == 128000
assert caps.supports_extended_thinking assert caps.supports_extended_thinking
assert caps.supports_system_prompts assert caps.supports_system_prompts
assert caps.supports_streaming assert caps.supports_streaming

View File

@@ -84,7 +84,7 @@ class TestGeminiProvider:
assert capabilities.provider == ProviderType.GOOGLE assert capabilities.provider == ProviderType.GOOGLE
assert capabilities.model_name == "gemini-2.5-flash-preview-05-20" assert capabilities.model_name == "gemini-2.5-flash-preview-05-20"
assert capabilities.max_tokens == 1_048_576 assert capabilities.context_window == 1_048_576
assert capabilities.supports_extended_thinking assert capabilities.supports_extended_thinking
def test_get_capabilities_pro_model(self): def test_get_capabilities_pro_model(self):
@@ -165,7 +165,7 @@ class TestOpenAIProvider:
assert capabilities.provider == ProviderType.OPENAI assert capabilities.provider == ProviderType.OPENAI
assert capabilities.model_name == "o3-mini" assert capabilities.model_name == "o3-mini"
assert capabilities.max_tokens == 200_000 assert capabilities.context_window == 200_000
assert not capabilities.supports_extended_thinking assert not capabilities.supports_extended_thinking
def test_validate_model_names(self): def test_validate_model_names(self):

View File

@@ -22,7 +22,7 @@ from typing import Any, Literal, Optional
from mcp.types import TextContent from mcp.types import TextContent
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from config import MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT from config import MCP_PROMPT_SIZE_LIMIT
from providers import ModelProvider, ModelProviderRegistry from providers import ModelProvider, ModelProviderRegistry
from utils import check_token_limit from utils import check_token_limit
from utils.conversation_memory import ( from utils.conversation_memory import (
@@ -414,7 +414,7 @@ class BaseTool(ABC):
request_files: List of files requested for current tool execution request_files: List of files requested for current tool execution
continuation_id: Thread continuation ID, or None for new conversations continuation_id: Thread continuation ID, or None for new conversations
context_description: Description for token limit validation (e.g. "Code", "New files") context_description: Description for token limit validation (e.g. "Code", "New files")
max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS) max_tokens: Maximum tokens to use (defaults to remaining budget or model-specific content allocation)
reserve_tokens: Tokens to reserve for additional prompt content (default 1K) reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
remaining_budget: Remaining token budget after conversation history (from server.py) remaining_budget: Remaining token budget after conversation history (from server.py)
arguments: Original tool arguments (used to extract _remaining_tokens if available) arguments: Original tool arguments (used to extract _remaining_tokens if available)
@@ -473,17 +473,17 @@ class BaseTool(ABC):
capabilities = provider.get_capabilities(model_name) capabilities = provider.get_capabilities(model_name)
# Calculate content allocation based on model capacity # Calculate content allocation based on model capacity
if capabilities.max_tokens < 300_000: if capabilities.context_window < 300_000:
# Smaller context models: 60% content, 40% response # Smaller context models: 60% content, 40% response
model_content_tokens = int(capabilities.max_tokens * 0.6) model_content_tokens = int(capabilities.context_window * 0.6)
else: else:
# Larger context models: 80% content, 20% response # Larger context models: 80% content, 20% response
model_content_tokens = int(capabilities.max_tokens * 0.8) model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = model_content_tokens - reserve_tokens effective_max_tokens = model_content_tokens - reserve_tokens
logger.debug( logger.debug(
f"[FILES] {self.name}: Using model-specific limit for {model_name}: " f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total" f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
) )
except (ValueError, AttributeError) as e: except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes # Handle specific errors: provider not found, model not supported, missing attributes
@@ -491,17 +491,13 @@ class BaseTool(ABC):
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}" f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
) )
# Fall back to conservative default for safety # Fall back to conservative default for safety
from config import MAX_CONTENT_TOKENS effective_max_tokens = 100_000 - reserve_tokens
effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
except Exception as e: except Exception as e:
# Catch any other unexpected errors # Catch any other unexpected errors
logger.error( logger.error(
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}" f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
) )
from config import MAX_CONTENT_TOKENS effective_max_tokens = 100_000 - reserve_tokens
effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
# Ensure we have a reasonable minimum budget # Ensure we have a reasonable minimum budget
effective_max_tokens = max(1000, effective_max_tokens) effective_max_tokens = max(1000, effective_max_tokens)
@@ -1233,7 +1229,7 @@ When recommending searches, be specific about what information you need and why
""" """
return response return response
def _validate_token_limit(self, text: str, context_type: str = "Context") -> None: def _validate_token_limit(self, text: str, context_type: str = "Context", context_window: int = 200_000) -> None:
""" """
Validate token limit and raise ValueError if exceeded. Validate token limit and raise ValueError if exceeded.
@@ -1243,14 +1239,15 @@ When recommending searches, be specific about what information you need and why
Args: Args:
text: The text to check text: The text to check
context_type: Description of what's being checked (for error message) context_type: Description of what's being checked (for error message)
context_window: The model's context window size
Raises: Raises:
ValueError: If text exceeds MAX_CONTEXT_TOKENS ValueError: If text exceeds context_window
""" """
within_limit, estimated_tokens = check_token_limit(text) within_limit, estimated_tokens = check_token_limit(text, context_window)
if not within_limit: if not within_limit:
raise ValueError( raise ValueError(
f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {MAX_CONTEXT_TOKENS:,} tokens." f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {context_window:,} tokens."
) )
def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]: def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]:

View File

@@ -14,7 +14,6 @@ from typing import Any, Literal, Optional
from mcp.types import TextContent from mcp.types import TextContent
from pydantic import Field from pydantic import Field
from config import MAX_CONTEXT_TOKENS
from prompts.tool_prompts import PRECOMMIT_PROMPT from prompts.tool_prompts import PRECOMMIT_PROMPT
from utils.file_utils import translate_file_paths, translate_path_for_environment from utils.file_utils import translate_file_paths, translate_path_for_environment
from utils.git_utils import find_git_repositories, get_git_status, run_git_command from utils.git_utils import find_git_repositories, get_git_status, run_git_command
@@ -23,6 +22,9 @@ from utils.token_utils import estimate_tokens
from .base import BaseTool, ToolRequest from .base import BaseTool, ToolRequest
from .models import ToolOutput from .models import ToolOutput
# Conservative fallback for token limits
DEFAULT_CONTEXT_WINDOW = 200_000
class PrecommitRequest(ToolRequest): class PrecommitRequest(ToolRequest):
"""Request model for precommit tool""" """Request model for precommit tool"""
@@ -186,7 +188,7 @@ class Precommit(BaseTool):
all_diffs = [] all_diffs = []
repo_summaries = [] repo_summaries = []
total_tokens = 0 total_tokens = 0
max_tokens = MAX_CONTEXT_TOKENS - 50000 # Reserve tokens for prompt and response max_tokens = DEFAULT_CONTEXT_WINDOW - 50000 # Reserve tokens for prompt and response
for repo_path in repositories: for repo_path in repositories:
repo_name = os.path.basename(repo_path) or "root" repo_name = os.path.basename(repo_path) or "root"

View File

@@ -23,7 +23,7 @@ import os
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from .token_utils import MAX_CONTEXT_TOKENS, estimate_tokens from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -508,14 +508,14 @@ def read_files(
Args: Args:
file_paths: List of file or directory paths (absolute paths required) file_paths: List of file or directory paths (absolute paths required)
code: Optional direct code to include (prioritized over files) code: Optional direct code to include (prioritized over files)
max_tokens: Maximum tokens to use (defaults to MAX_CONTEXT_TOKENS) max_tokens: Maximum tokens to use (defaults to DEFAULT_CONTEXT_WINDOW)
reserve_tokens: Tokens to reserve for prompt and response (default 50K) reserve_tokens: Tokens to reserve for prompt and response (default 50K)
Returns: Returns:
str: All file contents formatted for AI consumption str: All file contents formatted for AI consumption
""" """
if max_tokens is None: if max_tokens is None:
max_tokens = MAX_CONTEXT_TOKENS max_tokens = DEFAULT_CONTEXT_WINDOW
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths") logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
logger.debug( logger.debug(

View File

@@ -72,7 +72,7 @@ class ModelContext:
Returns: Returns:
TokenAllocation with calculated budgets TokenAllocation with calculated budgets
""" """
total_tokens = self.capabilities.max_tokens total_tokens = self.capabilities.context_window
# Dynamic allocation based on model capacity # Dynamic allocation based on model capacity
if total_tokens < 300_000: if total_tokens < 300_000:

View File

@@ -9,7 +9,8 @@ approximate. For production systems requiring precise token counts,
consider using the actual tokenizer for the specific model. consider using the actual tokenizer for the specific model.
""" """
from config import MAX_CONTEXT_TOKENS # Default fallback for token limit (conservative estimate)
DEFAULT_CONTEXT_WINDOW = 200_000 # Conservative fallback for unknown models
def estimate_tokens(text: str) -> int: def estimate_tokens(text: str) -> int:
@@ -32,9 +33,9 @@ def estimate_tokens(text: str) -> int:
return len(text) // 4 return len(text) // 4
def check_token_limit(text: str) -> tuple[bool, int]: def check_token_limit(text: str, context_window: int = DEFAULT_CONTEXT_WINDOW) -> tuple[bool, int]:
""" """
Check if text exceeds the maximum token limit for Gemini models. Check if text exceeds the specified token limit.
This function is used to validate that prepared prompts will fit This function is used to validate that prepared prompts will fit
within the model's context window, preventing API errors and ensuring within the model's context window, preventing API errors and ensuring
@@ -42,11 +43,12 @@ def check_token_limit(text: str) -> tuple[bool, int]:
Args: Args:
text: The text to check text: The text to check
context_window: The model's context window size (defaults to conservative fallback)
Returns: Returns:
Tuple[bool, int]: (is_within_limit, estimated_tokens) Tuple[bool, int]: (is_within_limit, estimated_tokens)
- is_within_limit: True if the text fits within MAX_CONTEXT_TOKENS - is_within_limit: True if the text fits within context_window
- estimated_tokens: The estimated token count - estimated_tokens: The estimated token count
""" """
estimated = estimate_tokens(text) estimated = estimate_tokens(text)
return estimated <= MAX_CONTEXT_TOKENS, estimated return estimated <= context_window, estimated