From e2762c4ed039f8e4b73f41b08b82f6e9d51f4f48 Mon Sep 17 00:00:00 2001 From: Fahad Date: Fri, 13 Jun 2025 08:58:35 +0400 Subject: [PATCH 1/3] Allow any API to be used together --- server.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/server.py b/server.py index 541ae23..669145c 100644 --- a/server.py +++ b/server.py @@ -158,24 +158,6 @@ def configure_providers(): has_openrouter = True logger.info("OpenRouter API key found - Multiple models available via OpenRouter") - # Check for conflicting configuration - if has_native_apis and has_openrouter: - logger.warning( - "\n" + "=" * 70 + "\n" - "WARNING: Both OpenRouter and native API keys detected!\n" - "\n" - "This creates ambiguity about which provider will be used for models\n" - "available through both APIs (e.g., 'o3' could come from OpenAI or OpenRouter).\n" - "\n" - "RECOMMENDATION: Use EITHER OpenRouter OR native APIs, not both.\n" - "\n" - "To fix this:\n" - "1. Use only OpenRouter: unset GEMINI_API_KEY and OPENAI_API_KEY\n" - "2. Use only native APIs: unset OPENROUTER_API_KEY\n" - "\n" - "Current configuration will prioritize native APIs over OpenRouter.\n" + "=" * 70 + "\n" - ) - # Register providers - native APIs first to ensure they take priority if has_native_apis: if gemini_key and gemini_key != "your_gemini_api_key_here": From b16f85979bee213e0b4334e58b399c4718ea17b6 Mon Sep 17 00:00:00 2001 From: Fahad Date: Fri, 13 Jun 2025 09:06:12 +0400 Subject: [PATCH 2/3] Use consistent terminology --- config.py | 7 ------- providers/base.py | 2 +- providers/gemini.py | 6 +++--- providers/openai.py | 6 +++--- providers/openrouter.py | 2 +- providers/openrouter_registry.py | 6 +----- server.py | 5 ++--- tests/test_config.py | 2 -- tools/base.py | 29 +++++++++++++---------------- tools/precommit.py | 5 +++-- utils/file_utils.py | 6 +++--- utils/model_context.py | 2 +- utils/token_utils.py | 12 +++++++----- 13 files changed, 38 insertions(+), 52 deletions(-) diff --git a/config.py b/config.py index cebc4ab..99e0a75 100644 --- a/config.py +++ b/config.py @@ -47,13 +47,6 @@ MODEL_CAPABILITIES_DESC = { # - "o3" → "openai/gpt-4o" # - "o3-mini" → "openai/gpt-4o-mini" -# Token allocation for Gemini Pro (1M total capacity) -# MAX_CONTEXT_TOKENS: Total model capacity -# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files -# RESPONSE_RESERVE_TOKENS: Reserved for model response generation -MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro -MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history) -RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation # Temperature defaults for different tool types # Temperature controls the randomness/creativity of model responses diff --git a/providers/base.py b/providers/base.py index 0908fd1..5ef1c25 100644 --- a/providers/base.py +++ b/providers/base.py @@ -105,7 +105,7 @@ class ModelCapabilities: provider: ProviderType model_name: str friendly_name: str # Human-friendly name like "Gemini" or "OpenAI" - max_tokens: int + context_window: int # Total context window size in tokens supports_extended_thinking: bool = False supports_system_prompts: bool = True supports_streaming: bool = True diff --git a/providers/gemini.py b/providers/gemini.py index 5fe435e..588ad2b 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -14,12 +14,12 @@ class GeminiModelProvider(ModelProvider): # Model configurations SUPPORTED_MODELS = { "gemini-2.5-flash-preview-05-20": { - "max_tokens": 1_048_576, # 1M tokens + "context_window": 1_048_576, # 1M tokens "supports_extended_thinking": True, "max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit }, "gemini-2.5-pro-preview-06-05": { - "max_tokens": 1_048_576, # 1M tokens + "context_window": 1_048_576, # 1M tokens "supports_extended_thinking": True, "max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit }, @@ -68,7 +68,7 @@ class GeminiModelProvider(ModelProvider): provider=ProviderType.GOOGLE, model_name=resolved_name, friendly_name="Gemini", - max_tokens=config["max_tokens"], + context_window=config["context_window"], supports_extended_thinking=config["supports_extended_thinking"], supports_system_prompts=True, supports_streaming=True, diff --git a/providers/openai.py b/providers/openai.py index e1875de..9284ff0 100644 --- a/providers/openai.py +++ b/providers/openai.py @@ -15,11 +15,11 @@ class OpenAIModelProvider(OpenAICompatibleProvider): # Model configurations SUPPORTED_MODELS = { "o3": { - "max_tokens": 200_000, # 200K tokens + "context_window": 200_000, # 200K tokens "supports_extended_thinking": False, }, "o3-mini": { - "max_tokens": 200_000, # 200K tokens + "context_window": 200_000, # 200K tokens "supports_extended_thinking": False, }, } @@ -49,7 +49,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): provider=ProviderType.OPENAI, model_name=model_name, friendly_name="OpenAI", - max_tokens=config["max_tokens"], + context_window=config["context_window"], supports_extended_thinking=config["supports_extended_thinking"], supports_system_prompts=True, supports_streaming=True, diff --git a/providers/openrouter.py b/providers/openrouter.py index e82d258..fb55bc9 100644 --- a/providers/openrouter.py +++ b/providers/openrouter.py @@ -109,7 +109,7 @@ class OpenRouterProvider(OpenAICompatibleProvider): provider=ProviderType.OPENROUTER, model_name=resolved_name, friendly_name=self.FRIENDLY_NAME, - max_tokens=32_768, # Conservative default context window + context_window=32_768, # Conservative default context window supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/providers/openrouter_registry.py b/providers/openrouter_registry.py index 2172fcb..fa3f246 100644 --- a/providers/openrouter_registry.py +++ b/providers/openrouter_registry.py @@ -30,7 +30,7 @@ class OpenRouterModelConfig: provider=ProviderType.OPENROUTER, model_name=self.model_name, friendly_name="OpenRouter", - max_tokens=self.context_window, # ModelCapabilities still uses max_tokens + context_window=self.context_window, supports_extended_thinking=self.supports_extended_thinking, supports_system_prompts=self.supports_system_prompts, supports_streaming=self.supports_streaming, @@ -103,10 +103,6 @@ class OpenRouterModelRegistry: # Parse models configs = [] for model_data in data.get("models", []): - # Handle backwards compatibility - rename max_tokens to context_window - if "max_tokens" in model_data and "context_window" not in model_data: - model_data["context_window"] = model_data.pop("max_tokens") - config = OpenRouterModelConfig(**model_data) configs.append(config) diff --git a/server.py b/server.py index 669145c..64b475b 100644 --- a/server.py +++ b/server.py @@ -33,7 +33,6 @@ from mcp.types import ServerCapabilities, TextContent, Tool, ToolsCapability from config import ( DEFAULT_MODEL, - MAX_CONTEXT_TOKENS, __author__, __updated__, __version__, @@ -521,7 +520,7 @@ async def handle_get_version() -> list[TextContent]: "author": __author__, "default_model": DEFAULT_MODEL, "default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP, - "max_context_tokens": f"{MAX_CONTEXT_TOKENS:,}", + "max_context_tokens": "Dynamic (model-specific)", "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", "server_started": datetime.now().isoformat(), "available_tools": list(TOOLS.keys()) + ["get_version"], @@ -547,7 +546,7 @@ Author: {__author__} Configuration: - Default Model: {DEFAULT_MODEL} - Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP} -- Max Context: {MAX_CONTEXT_TOKENS:,} tokens +- Max Context: Dynamic (model-specific) - Python: {version_info["python_version"]} - Started: {version_info["server_started"]} diff --git a/tests/test_config.py b/tests/test_config.py index 6220226..e6ad23b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,7 +4,6 @@ Tests for configuration from config import ( DEFAULT_MODEL, - MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED, TEMPERATURE_CREATIVE, @@ -33,7 +32,6 @@ class TestConfig: """Test model configuration""" # DEFAULT_MODEL is set in conftest.py for tests assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20" - assert MAX_CONTEXT_TOKENS == 1_000_000 def test_temperature_defaults(self): """Test temperature constants""" diff --git a/tools/base.py b/tools/base.py index 28fc342..12b4812 100644 --- a/tools/base.py +++ b/tools/base.py @@ -22,7 +22,7 @@ from typing import Any, Literal, Optional from mcp.types import TextContent from pydantic import BaseModel, Field -from config import MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT +from config import MCP_PROMPT_SIZE_LIMIT from providers import ModelProvider, ModelProviderRegistry from utils import check_token_limit from utils.conversation_memory import ( @@ -414,7 +414,7 @@ class BaseTool(ABC): request_files: List of files requested for current tool execution continuation_id: Thread continuation ID, or None for new conversations context_description: Description for token limit validation (e.g. "Code", "New files") - max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS) + max_tokens: Maximum tokens to use (defaults to remaining budget or model-specific content allocation) reserve_tokens: Tokens to reserve for additional prompt content (default 1K) remaining_budget: Remaining token budget after conversation history (from server.py) arguments: Original tool arguments (used to extract _remaining_tokens if available) @@ -473,17 +473,17 @@ class BaseTool(ABC): capabilities = provider.get_capabilities(model_name) # Calculate content allocation based on model capacity - if capabilities.max_tokens < 300_000: + if capabilities.context_window < 300_000: # Smaller context models: 60% content, 40% response - model_content_tokens = int(capabilities.max_tokens * 0.6) + model_content_tokens = int(capabilities.context_window * 0.6) else: # Larger context models: 80% content, 20% response - model_content_tokens = int(capabilities.max_tokens * 0.8) + model_content_tokens = int(capabilities.context_window * 0.8) effective_max_tokens = model_content_tokens - reserve_tokens logger.debug( f"[FILES] {self.name}: Using model-specific limit for {model_name}: " - f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total" + f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total" ) except (ValueError, AttributeError) as e: # Handle specific errors: provider not found, model not supported, missing attributes @@ -491,17 +491,13 @@ class BaseTool(ABC): f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}" ) # Fall back to conservative default for safety - from config import MAX_CONTENT_TOKENS - - effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens + effective_max_tokens = 100_000 - reserve_tokens except Exception as e: # Catch any other unexpected errors logger.error( f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}" ) - from config import MAX_CONTENT_TOKENS - - effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens + effective_max_tokens = 100_000 - reserve_tokens # Ensure we have a reasonable minimum budget effective_max_tokens = max(1000, effective_max_tokens) @@ -1233,7 +1229,7 @@ When recommending searches, be specific about what information you need and why """ return response - def _validate_token_limit(self, text: str, context_type: str = "Context") -> None: + def _validate_token_limit(self, text: str, context_type: str = "Context", context_window: int = 200_000) -> None: """ Validate token limit and raise ValueError if exceeded. @@ -1243,14 +1239,15 @@ When recommending searches, be specific about what information you need and why Args: text: The text to check context_type: Description of what's being checked (for error message) + context_window: The model's context window size Raises: - ValueError: If text exceeds MAX_CONTEXT_TOKENS + ValueError: If text exceeds context_window """ - within_limit, estimated_tokens = check_token_limit(text) + within_limit, estimated_tokens = check_token_limit(text, context_window) if not within_limit: raise ValueError( - f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {MAX_CONTEXT_TOKENS:,} tokens." + f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {context_window:,} tokens." ) def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]: diff --git a/tools/precommit.py b/tools/precommit.py index 87ea5a5..23bdb99 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -14,7 +14,8 @@ from typing import Any, Literal, Optional from mcp.types import TextContent from pydantic import Field -from config import MAX_CONTEXT_TOKENS +# Conservative fallback for token limits +DEFAULT_CONTEXT_WINDOW = 200_000 from prompts.tool_prompts import PRECOMMIT_PROMPT from utils.file_utils import translate_file_paths, translate_path_for_environment from utils.git_utils import find_git_repositories, get_git_status, run_git_command @@ -186,7 +187,7 @@ class Precommit(BaseTool): all_diffs = [] repo_summaries = [] total_tokens = 0 - max_tokens = MAX_CONTEXT_TOKENS - 50000 # Reserve tokens for prompt and response + max_tokens = DEFAULT_CONTEXT_WINDOW - 50000 # Reserve tokens for prompt and response for repo_path in repositories: repo_name = os.path.basename(repo_path) or "root" diff --git a/utils/file_utils.py b/utils/file_utils.py index 8d986c4..fb28c36 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -23,7 +23,7 @@ import os from pathlib import Path from typing import Optional -from .token_utils import MAX_CONTEXT_TOKENS, estimate_tokens +from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens logger = logging.getLogger(__name__) @@ -508,14 +508,14 @@ def read_files( Args: file_paths: List of file or directory paths (absolute paths required) code: Optional direct code to include (prioritized over files) - max_tokens: Maximum tokens to use (defaults to MAX_CONTEXT_TOKENS) + max_tokens: Maximum tokens to use (defaults to DEFAULT_CONTEXT_WINDOW) reserve_tokens: Tokens to reserve for prompt and response (default 50K) Returns: str: All file contents formatted for AI consumption """ if max_tokens is None: - max_tokens = MAX_CONTEXT_TOKENS + max_tokens = DEFAULT_CONTEXT_WINDOW logger.debug(f"[FILES] read_files called with {len(file_paths)} paths") logger.debug( diff --git a/utils/model_context.py b/utils/model_context.py index 766d0f8..1055172 100644 --- a/utils/model_context.py +++ b/utils/model_context.py @@ -72,7 +72,7 @@ class ModelContext: Returns: TokenAllocation with calculated budgets """ - total_tokens = self.capabilities.max_tokens + total_tokens = self.capabilities.context_window # Dynamic allocation based on model capacity if total_tokens < 300_000: diff --git a/utils/token_utils.py b/utils/token_utils.py index 2ea7fa9..393669e 100644 --- a/utils/token_utils.py +++ b/utils/token_utils.py @@ -9,7 +9,8 @@ approximate. For production systems requiring precise token counts, consider using the actual tokenizer for the specific model. """ -from config import MAX_CONTEXT_TOKENS +# Default fallback for token limit (conservative estimate) +DEFAULT_CONTEXT_WINDOW = 200_000 # Conservative fallback for unknown models def estimate_tokens(text: str) -> int: @@ -32,9 +33,9 @@ def estimate_tokens(text: str) -> int: return len(text) // 4 -def check_token_limit(text: str) -> tuple[bool, int]: +def check_token_limit(text: str, context_window: int = DEFAULT_CONTEXT_WINDOW) -> tuple[bool, int]: """ - Check if text exceeds the maximum token limit for Gemini models. + Check if text exceeds the specified token limit. This function is used to validate that prepared prompts will fit within the model's context window, preventing API errors and ensuring @@ -42,11 +43,12 @@ def check_token_limit(text: str) -> tuple[bool, int]: Args: text: The text to check + context_window: The model's context window size (defaults to conservative fallback) Returns: Tuple[bool, int]: (is_within_limit, estimated_tokens) - - is_within_limit: True if the text fits within MAX_CONTEXT_TOKENS + - is_within_limit: True if the text fits within context_window - estimated_tokens: The estimated token count """ estimated = estimate_tokens(text) - return estimated <= MAX_CONTEXT_TOKENS, estimated + return estimated <= context_window, estimated From a641159a676b645e524948a5f49eb8ab6b261379 Mon Sep 17 00:00:00 2001 From: Fahad Date: Fri, 13 Jun 2025 09:28:33 +0400 Subject: [PATCH 3/3] Use consistent terminology Remove test folder from .gitignore for live simulation test to pass --- .gitignore | 1 - .../test_per_tool_deduplication.py | 20 +++++++++++++++--- tests/mock_helpers.py | 4 ++-- tests/test_conversation_field_mapping.py | 2 +- tests/test_openrouter_provider.py | 4 ++-- tests/test_openrouter_registry.py | 21 ++++++++----------- tests/test_providers.py | 4 ++-- tools/precommit.py | 5 +++-- 8 files changed, 36 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index e936c0a..ceb055a 100644 --- a/.gitignore +++ b/.gitignore @@ -165,4 +165,3 @@ test_simulation_files/.claude/ # Temporary test directories test-setup/ -/test_simulation_files/** diff --git a/simulator_tests/test_per_tool_deduplication.py b/simulator_tests/test_per_tool_deduplication.py index 4d6b55d..d5814a8 100644 --- a/simulator_tests/test_per_tool_deduplication.py +++ b/simulator_tests/test_per_tool_deduplication.py @@ -11,6 +11,7 @@ Validates that: 4. Docker logs show deduplication behavior """ +import os import subprocess from .base_test import BaseSimulatorTest @@ -98,14 +99,17 @@ class PerToolDeduplicationTest(BaseSimulatorTest): # Setup test files self.setup_test_files() - # Create a short dummy file for quick testing + # Create a short dummy file for quick testing in the current repo dummy_content = """def add(a, b): return a + b # Missing type hints def divide(x, y): return x / y # No zero check """ - dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content) + # Create the file in the current git repo directory to make it show up in git status + dummy_file_path = os.path.join(os.getcwd(), "dummy_code.py") + with open(dummy_file_path, "w") as f: + f.write(dummy_content) # Get timestamp for log filtering import datetime @@ -162,7 +166,10 @@ def divide(x, y): def subtract(a, b): return a - b """ - new_file_path = self.create_additional_test_file("new_feature.py", new_file_content) + # Create another temp file in the current repo for git changes + new_file_path = os.path.join(os.getcwd(), "new_feature.py") + with open(new_file_path, "w") as f: + f.write(new_file_content) # Continue precommit with both files continue_params = { @@ -249,4 +256,11 @@ def subtract(a, b): self.logger.error(f"File deduplication workflow test failed: {e}") return False finally: + # Clean up temp files created in current repo + temp_files = ["dummy_code.py", "new_feature.py"] + for temp_file in temp_files: + temp_path = os.path.join(os.getcwd(), temp_file) + if os.path.exists(temp_path): + os.remove(temp_path) + self.logger.debug(f"Removed temp file: {temp_path}") self.cleanup_test_files() diff --git a/tests/mock_helpers.py b/tests/mock_helpers.py index 447dd5b..0ee4465 100644 --- a/tests/mock_helpers.py +++ b/tests/mock_helpers.py @@ -5,7 +5,7 @@ from unittest.mock import Mock from providers.base import ModelCapabilities, ProviderType, RangeTemperatureConstraint -def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", max_tokens=1_048_576): +def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", context_window=1_048_576): """Create a properly configured mock provider.""" mock_provider = Mock() @@ -14,7 +14,7 @@ def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", max_tokens provider=ProviderType.GOOGLE, model_name=model_name, friendly_name="Gemini", - max_tokens=max_tokens, + context_window=context_window, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/tests/test_conversation_field_mapping.py b/tests/test_conversation_field_mapping.py index 28cd82e..014d2e7 100644 --- a/tests/test_conversation_field_mapping.py +++ b/tests/test_conversation_field_mapping.py @@ -77,7 +77,7 @@ async def test_conversation_history_field_mapping(): provider=ProviderType.GOOGLE, model_name="gemini-2.5-flash-preview-05-20", friendly_name="Gemini", - max_tokens=200000, + context_window=200000, supports_extended_thinking=True, ) mock_get_provider.return_value = mock_provider diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index 600400d..34c9433 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -61,7 +61,7 @@ class TestOpenRouterProvider: caps = provider.get_capabilities("unknown-model") assert caps.provider == ProviderType.OPENROUTER assert caps.model_name == "unknown-model" - assert caps.max_tokens == 32_768 # Safe default + assert caps.context_window == 32_768 # Safe default assert hasattr(caps, "_is_generic") and caps._is_generic is True def test_model_alias_resolution(self): @@ -139,7 +139,7 @@ class TestOpenRouterRegistry: caps = registry.get_capabilities("opus") assert caps is not None assert caps.model_name == "anthropic/claude-3-opus" - assert caps.max_tokens == 200000 # Claude's context window + assert caps.context_window == 200000 # Claude's context window # Test using full model name caps = registry.get_capabilities("anthropic/claude-3-opus") diff --git a/tests/test_openrouter_registry.py b/tests/test_openrouter_registry.py index 830ca47..0f55449 100644 --- a/tests/test_openrouter_registry.py +++ b/tests/test_openrouter_registry.py @@ -120,7 +120,7 @@ class TestOpenRouterModelRegistry: assert caps.provider == ProviderType.OPENROUTER assert caps.model_name == "anthropic/claude-3-opus" assert caps.friendly_name == "OpenRouter" - assert caps.max_tokens == 200000 + assert caps.context_window == 200000 assert not caps.supports_extended_thinking def test_duplicate_alias_detection(self): @@ -147,13 +147,13 @@ class TestOpenRouterModelRegistry: os.unlink(temp_path) def test_backwards_compatibility_max_tokens(self): - """Test backwards compatibility with old max_tokens field.""" + """Test that old max_tokens field is no longer supported (should result in empty registry).""" config_data = { "models": [ { "model_name": "test/old-model", "aliases": ["old"], - "max_tokens": 16384, # Old field name + "max_tokens": 16384, # Old field name should cause error "supports_extended_thinking": False, } ] @@ -164,15 +164,12 @@ class TestOpenRouterModelRegistry: temp_path = f.name try: + # Should gracefully handle the error and result in empty registry registry = OpenRouterModelRegistry(config_path=temp_path) - config = registry.resolve("old") - - assert config is not None - assert config.context_window == 16384 # Should be converted - - # Check capabilities still work - caps = config.to_capabilities() - assert caps.max_tokens == 16384 + # Registry should be empty due to config error + assert len(registry.list_models()) == 0 + assert len(registry.list_aliases()) == 0 + assert registry.resolve("old") is None finally: os.unlink(temp_path) @@ -215,7 +212,7 @@ class TestOpenRouterModelRegistry: ) caps = config.to_capabilities() - assert caps.max_tokens == 128000 + assert caps.context_window == 128000 assert caps.supports_extended_thinking assert caps.supports_system_prompts assert caps.supports_streaming diff --git a/tests/test_providers.py b/tests/test_providers.py index f1827f5..5fa019c 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -84,7 +84,7 @@ class TestGeminiProvider: assert capabilities.provider == ProviderType.GOOGLE assert capabilities.model_name == "gemini-2.5-flash-preview-05-20" - assert capabilities.max_tokens == 1_048_576 + assert capabilities.context_window == 1_048_576 assert capabilities.supports_extended_thinking def test_get_capabilities_pro_model(self): @@ -165,7 +165,7 @@ class TestOpenAIProvider: assert capabilities.provider == ProviderType.OPENAI assert capabilities.model_name == "o3-mini" - assert capabilities.max_tokens == 200_000 + assert capabilities.context_window == 200_000 assert not capabilities.supports_extended_thinking def test_validate_model_names(self): diff --git a/tools/precommit.py b/tools/precommit.py index 23bdb99..23a5a2e 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -14,8 +14,6 @@ from typing import Any, Literal, Optional from mcp.types import TextContent from pydantic import Field -# Conservative fallback for token limits -DEFAULT_CONTEXT_WINDOW = 200_000 from prompts.tool_prompts import PRECOMMIT_PROMPT from utils.file_utils import translate_file_paths, translate_path_for_environment from utils.git_utils import find_git_repositories, get_git_status, run_git_command @@ -24,6 +22,9 @@ from utils.token_utils import estimate_tokens from .base import BaseTool, ToolRequest from .models import ToolOutput +# Conservative fallback for token limits +DEFAULT_CONTEXT_WINDOW = 200_000 + class PrecommitRequest(ToolRequest): """Request model for precommit tool"""