Merge remote-tracking branch 'upstream/main' into feat/comprehensive-project-improvements
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -165,9 +165,3 @@ test_simulation_files/.claude/
|
|||||||
|
|
||||||
# Temporary test directories
|
# Temporary test directories
|
||||||
test-setup/
|
test-setup/
|
||||||
/test_simulation_files/**
|
|
||||||
|
|
||||||
# Remove Claude configuration
|
|
||||||
.mcp.json
|
|
||||||
Claude.md
|
|
||||||
memory-bank
|
|
||||||
@@ -47,13 +47,6 @@ MODEL_CAPABILITIES_DESC = {
|
|||||||
# - "o3" → "openai/gpt-4o"
|
# - "o3" → "openai/gpt-4o"
|
||||||
# - "o3-mini" → "openai/gpt-4o-mini"
|
# - "o3-mini" → "openai/gpt-4o-mini"
|
||||||
|
|
||||||
# Token allocation for Gemini Pro (1M total capacity)
|
|
||||||
# MAX_CONTEXT_TOKENS: Total model capacity
|
|
||||||
# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
|
|
||||||
# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
|
|
||||||
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
|
|
||||||
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
|
|
||||||
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
|
|
||||||
|
|
||||||
# Temperature defaults for different tool types
|
# Temperature defaults for different tool types
|
||||||
# Temperature controls the randomness/creativity of model responses
|
# Temperature controls the randomness/creativity of model responses
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ class ModelCapabilities:
|
|||||||
provider: ProviderType
|
provider: ProviderType
|
||||||
model_name: str
|
model_name: str
|
||||||
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
|
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
|
||||||
max_tokens: int
|
context_window: int # Total context window size in tokens
|
||||||
supports_extended_thinking: bool = False
|
supports_extended_thinking: bool = False
|
||||||
supports_system_prompts: bool = True
|
supports_system_prompts: bool = True
|
||||||
supports_streaming: bool = True
|
supports_streaming: bool = True
|
||||||
|
|||||||
@@ -14,12 +14,12 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
# Model configurations
|
# Model configurations
|
||||||
SUPPORTED_MODELS = {
|
SUPPORTED_MODELS = {
|
||||||
"gemini-2.5-flash-preview-05-20": {
|
"gemini-2.5-flash-preview-05-20": {
|
||||||
"max_tokens": 1_048_576, # 1M tokens
|
"context_window": 1_048_576, # 1M tokens
|
||||||
"supports_extended_thinking": True,
|
"supports_extended_thinking": True,
|
||||||
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
|
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
|
||||||
},
|
},
|
||||||
"gemini-2.5-pro-preview-06-05": {
|
"gemini-2.5-pro-preview-06-05": {
|
||||||
"max_tokens": 1_048_576, # 1M tokens
|
"context_window": 1_048_576, # 1M tokens
|
||||||
"supports_extended_thinking": True,
|
"supports_extended_thinking": True,
|
||||||
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
|
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
|
||||||
},
|
},
|
||||||
@@ -68,7 +68,7 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
provider=ProviderType.GOOGLE,
|
provider=ProviderType.GOOGLE,
|
||||||
model_name=resolved_name,
|
model_name=resolved_name,
|
||||||
friendly_name="Gemini",
|
friendly_name="Gemini",
|
||||||
max_tokens=config["max_tokens"],
|
context_window=config["context_window"],
|
||||||
supports_extended_thinking=config["supports_extended_thinking"],
|
supports_extended_thinking=config["supports_extended_thinking"],
|
||||||
supports_system_prompts=True,
|
supports_system_prompts=True,
|
||||||
supports_streaming=True,
|
supports_streaming=True,
|
||||||
|
|||||||
@@ -15,11 +15,11 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
|||||||
# Model configurations
|
# Model configurations
|
||||||
SUPPORTED_MODELS = {
|
SUPPORTED_MODELS = {
|
||||||
"o3": {
|
"o3": {
|
||||||
"max_tokens": 200_000, # 200K tokens
|
"context_window": 200_000, # 200K tokens
|
||||||
"supports_extended_thinking": False,
|
"supports_extended_thinking": False,
|
||||||
},
|
},
|
||||||
"o3-mini": {
|
"o3-mini": {
|
||||||
"max_tokens": 200_000, # 200K tokens
|
"context_window": 200_000, # 200K tokens
|
||||||
"supports_extended_thinking": False,
|
"supports_extended_thinking": False,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -49,7 +49,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
|||||||
provider=ProviderType.OPENAI,
|
provider=ProviderType.OPENAI,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
friendly_name="OpenAI",
|
friendly_name="OpenAI",
|
||||||
max_tokens=config["max_tokens"],
|
context_window=config["context_window"],
|
||||||
supports_extended_thinking=config["supports_extended_thinking"],
|
supports_extended_thinking=config["supports_extended_thinking"],
|
||||||
supports_system_prompts=True,
|
supports_system_prompts=True,
|
||||||
supports_streaming=True,
|
supports_streaming=True,
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
|||||||
provider=ProviderType.OPENROUTER,
|
provider=ProviderType.OPENROUTER,
|
||||||
model_name=resolved_name,
|
model_name=resolved_name,
|
||||||
friendly_name=self.FRIENDLY_NAME,
|
friendly_name=self.FRIENDLY_NAME,
|
||||||
max_tokens=32_768, # Conservative default context window
|
context_window=32_768, # Conservative default context window
|
||||||
supports_extended_thinking=False,
|
supports_extended_thinking=False,
|
||||||
supports_system_prompts=True,
|
supports_system_prompts=True,
|
||||||
supports_streaming=True,
|
supports_streaming=True,
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ class OpenRouterModelConfig:
|
|||||||
provider=ProviderType.OPENROUTER,
|
provider=ProviderType.OPENROUTER,
|
||||||
model_name=self.model_name,
|
model_name=self.model_name,
|
||||||
friendly_name="OpenRouter",
|
friendly_name="OpenRouter",
|
||||||
max_tokens=self.context_window, # ModelCapabilities still uses max_tokens
|
context_window=self.context_window,
|
||||||
supports_extended_thinking=self.supports_extended_thinking,
|
supports_extended_thinking=self.supports_extended_thinking,
|
||||||
supports_system_prompts=self.supports_system_prompts,
|
supports_system_prompts=self.supports_system_prompts,
|
||||||
supports_streaming=self.supports_streaming,
|
supports_streaming=self.supports_streaming,
|
||||||
@@ -103,10 +103,6 @@ class OpenRouterModelRegistry:
|
|||||||
# Parse models
|
# Parse models
|
||||||
configs = []
|
configs = []
|
||||||
for model_data in data.get("models", []):
|
for model_data in data.get("models", []):
|
||||||
# Handle backwards compatibility - rename max_tokens to context_window
|
|
||||||
if "max_tokens" in model_data and "context_window" not in model_data:
|
|
||||||
model_data["context_window"] = model_data.pop("max_tokens")
|
|
||||||
|
|
||||||
config = OpenRouterModelConfig(**model_data)
|
config = OpenRouterModelConfig(**model_data)
|
||||||
configs.append(config)
|
configs.append(config)
|
||||||
|
|
||||||
|
|||||||
23
server.py
23
server.py
@@ -33,7 +33,6 @@ from mcp.types import ServerCapabilities, TextContent, Tool, ToolsCapability
|
|||||||
|
|
||||||
from config import (
|
from config import (
|
||||||
DEFAULT_MODEL,
|
DEFAULT_MODEL,
|
||||||
MAX_CONTEXT_TOKENS,
|
|
||||||
__author__,
|
__author__,
|
||||||
__updated__,
|
__updated__,
|
||||||
__version__,
|
__version__,
|
||||||
@@ -158,24 +157,6 @@ def configure_providers():
|
|||||||
has_openrouter = True
|
has_openrouter = True
|
||||||
logger.info("OpenRouter API key found - Multiple models available via OpenRouter")
|
logger.info("OpenRouter API key found - Multiple models available via OpenRouter")
|
||||||
|
|
||||||
# Check for conflicting configuration
|
|
||||||
if has_native_apis and has_openrouter:
|
|
||||||
logger.warning(
|
|
||||||
"\n" + "=" * 70 + "\n"
|
|
||||||
"WARNING: Both OpenRouter and native API keys detected!\n"
|
|
||||||
"\n"
|
|
||||||
"This creates ambiguity about which provider will be used for models\n"
|
|
||||||
"available through both APIs (e.g., 'o3' could come from OpenAI or OpenRouter).\n"
|
|
||||||
"\n"
|
|
||||||
"RECOMMENDATION: Use EITHER OpenRouter OR native APIs, not both.\n"
|
|
||||||
"\n"
|
|
||||||
"To fix this:\n"
|
|
||||||
"1. Use only OpenRouter: unset GEMINI_API_KEY and OPENAI_API_KEY\n"
|
|
||||||
"2. Use only native APIs: unset OPENROUTER_API_KEY\n"
|
|
||||||
"\n"
|
|
||||||
"Current configuration will prioritize native APIs over OpenRouter.\n" + "=" * 70 + "\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Register providers - native APIs first to ensure they take priority
|
# Register providers - native APIs first to ensure they take priority
|
||||||
if has_native_apis:
|
if has_native_apis:
|
||||||
if gemini_key and gemini_key != "your_gemini_api_key_here":
|
if gemini_key and gemini_key != "your_gemini_api_key_here":
|
||||||
@@ -539,7 +520,7 @@ async def handle_get_version() -> list[TextContent]:
|
|||||||
"author": __author__,
|
"author": __author__,
|
||||||
"default_model": DEFAULT_MODEL,
|
"default_model": DEFAULT_MODEL,
|
||||||
"default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP,
|
"default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP,
|
||||||
"max_context_tokens": f"{MAX_CONTEXT_TOKENS:,}",
|
"max_context_tokens": "Dynamic (model-specific)",
|
||||||
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
||||||
"server_started": datetime.now().isoformat(),
|
"server_started": datetime.now().isoformat(),
|
||||||
"available_tools": list(TOOLS.keys()) + ["get_version"],
|
"available_tools": list(TOOLS.keys()) + ["get_version"],
|
||||||
@@ -565,7 +546,7 @@ Author: {__author__}
|
|||||||
Configuration:
|
Configuration:
|
||||||
- Default Model: {DEFAULT_MODEL}
|
- Default Model: {DEFAULT_MODEL}
|
||||||
- Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP}
|
- Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP}
|
||||||
- Max Context: {MAX_CONTEXT_TOKENS:,} tokens
|
- Max Context: Dynamic (model-specific)
|
||||||
- Python: {version_info["python_version"]}
|
- Python: {version_info["python_version"]}
|
||||||
- Started: {version_info["server_started"]}
|
- Started: {version_info["server_started"]}
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ Validates that:
|
|||||||
4. Docker logs show deduplication behavior
|
4. Docker logs show deduplication behavior
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
@@ -98,14 +99,17 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
# Setup test files
|
# Setup test files
|
||||||
self.setup_test_files()
|
self.setup_test_files()
|
||||||
|
|
||||||
# Create a short dummy file for quick testing
|
# Create a short dummy file for quick testing in the current repo
|
||||||
dummy_content = """def add(a, b):
|
dummy_content = """def add(a, b):
|
||||||
return a + b # Missing type hints
|
return a + b # Missing type hints
|
||||||
|
|
||||||
def divide(x, y):
|
def divide(x, y):
|
||||||
return x / y # No zero check
|
return x / y # No zero check
|
||||||
"""
|
"""
|
||||||
dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content)
|
# Create the file in the current git repo directory to make it show up in git status
|
||||||
|
dummy_file_path = os.path.join(os.getcwd(), "dummy_code.py")
|
||||||
|
with open(dummy_file_path, "w") as f:
|
||||||
|
f.write(dummy_content)
|
||||||
|
|
||||||
# Get timestamp for log filtering
|
# Get timestamp for log filtering
|
||||||
import datetime
|
import datetime
|
||||||
@@ -162,7 +166,10 @@ def divide(x, y):
|
|||||||
def subtract(a, b):
|
def subtract(a, b):
|
||||||
return a - b
|
return a - b
|
||||||
"""
|
"""
|
||||||
new_file_path = self.create_additional_test_file("new_feature.py", new_file_content)
|
# Create another temp file in the current repo for git changes
|
||||||
|
new_file_path = os.path.join(os.getcwd(), "new_feature.py")
|
||||||
|
with open(new_file_path, "w") as f:
|
||||||
|
f.write(new_file_content)
|
||||||
|
|
||||||
# Continue precommit with both files
|
# Continue precommit with both files
|
||||||
continue_params = {
|
continue_params = {
|
||||||
@@ -249,4 +256,11 @@ def subtract(a, b):
|
|||||||
self.logger.error(f"File deduplication workflow test failed: {e}")
|
self.logger.error(f"File deduplication workflow test failed: {e}")
|
||||||
return False
|
return False
|
||||||
finally:
|
finally:
|
||||||
|
# Clean up temp files created in current repo
|
||||||
|
temp_files = ["dummy_code.py", "new_feature.py"]
|
||||||
|
for temp_file in temp_files:
|
||||||
|
temp_path = os.path.join(os.getcwd(), temp_file)
|
||||||
|
if os.path.exists(temp_path):
|
||||||
|
os.remove(temp_path)
|
||||||
|
self.logger.debug(f"Removed temp file: {temp_path}")
|
||||||
self.cleanup_test_files()
|
self.cleanup_test_files()
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from unittest.mock import Mock
|
|||||||
from providers.base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
|
from providers.base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
|
||||||
|
|
||||||
|
|
||||||
def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", max_tokens=1_048_576):
|
def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", context_window=1_048_576):
|
||||||
"""Create a properly configured mock provider."""
|
"""Create a properly configured mock provider."""
|
||||||
mock_provider = Mock()
|
mock_provider = Mock()
|
||||||
|
|
||||||
@@ -14,7 +14,7 @@ def create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", max_tokens
|
|||||||
provider=ProviderType.GOOGLE,
|
provider=ProviderType.GOOGLE,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
friendly_name="Gemini",
|
friendly_name="Gemini",
|
||||||
max_tokens=max_tokens,
|
context_window=context_window,
|
||||||
supports_extended_thinking=False,
|
supports_extended_thinking=False,
|
||||||
supports_system_prompts=True,
|
supports_system_prompts=True,
|
||||||
supports_streaming=True,
|
supports_streaming=True,
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ Tests for configuration
|
|||||||
|
|
||||||
from config import (
|
from config import (
|
||||||
DEFAULT_MODEL,
|
DEFAULT_MODEL,
|
||||||
MAX_CONTEXT_TOKENS,
|
|
||||||
TEMPERATURE_ANALYTICAL,
|
TEMPERATURE_ANALYTICAL,
|
||||||
TEMPERATURE_BALANCED,
|
TEMPERATURE_BALANCED,
|
||||||
TEMPERATURE_CREATIVE,
|
TEMPERATURE_CREATIVE,
|
||||||
@@ -33,7 +32,6 @@ class TestConfig:
|
|||||||
"""Test model configuration"""
|
"""Test model configuration"""
|
||||||
# DEFAULT_MODEL is set in conftest.py for tests
|
# DEFAULT_MODEL is set in conftest.py for tests
|
||||||
assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20"
|
assert DEFAULT_MODEL == "gemini-2.5-flash-preview-05-20"
|
||||||
assert MAX_CONTEXT_TOKENS == 1_000_000
|
|
||||||
|
|
||||||
def test_temperature_defaults(self):
|
def test_temperature_defaults(self):
|
||||||
"""Test temperature constants"""
|
"""Test temperature constants"""
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ async def test_conversation_history_field_mapping():
|
|||||||
provider=ProviderType.GOOGLE,
|
provider=ProviderType.GOOGLE,
|
||||||
model_name="gemini-2.5-flash-preview-05-20",
|
model_name="gemini-2.5-flash-preview-05-20",
|
||||||
friendly_name="Gemini",
|
friendly_name="Gemini",
|
||||||
max_tokens=200000,
|
context_window=200000,
|
||||||
supports_extended_thinking=True,
|
supports_extended_thinking=True,
|
||||||
)
|
)
|
||||||
mock_get_provider.return_value = mock_provider
|
mock_get_provider.return_value = mock_provider
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class TestOpenRouterProvider:
|
|||||||
caps = provider.get_capabilities("unknown-model")
|
caps = provider.get_capabilities("unknown-model")
|
||||||
assert caps.provider == ProviderType.OPENROUTER
|
assert caps.provider == ProviderType.OPENROUTER
|
||||||
assert caps.model_name == "unknown-model"
|
assert caps.model_name == "unknown-model"
|
||||||
assert caps.max_tokens == 32_768 # Safe default
|
assert caps.context_window == 32_768 # Safe default
|
||||||
assert hasattr(caps, "_is_generic") and caps._is_generic is True
|
assert hasattr(caps, "_is_generic") and caps._is_generic is True
|
||||||
|
|
||||||
def test_model_alias_resolution(self):
|
def test_model_alias_resolution(self):
|
||||||
@@ -139,7 +139,7 @@ class TestOpenRouterRegistry:
|
|||||||
caps = registry.get_capabilities("opus")
|
caps = registry.get_capabilities("opus")
|
||||||
assert caps is not None
|
assert caps is not None
|
||||||
assert caps.model_name == "anthropic/claude-3-opus"
|
assert caps.model_name == "anthropic/claude-3-opus"
|
||||||
assert caps.max_tokens == 200000 # Claude's context window
|
assert caps.context_window == 200000 # Claude's context window
|
||||||
|
|
||||||
# Test using full model name
|
# Test using full model name
|
||||||
caps = registry.get_capabilities("anthropic/claude-3-opus")
|
caps = registry.get_capabilities("anthropic/claude-3-opus")
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ class TestOpenRouterModelRegistry:
|
|||||||
assert caps.provider == ProviderType.OPENROUTER
|
assert caps.provider == ProviderType.OPENROUTER
|
||||||
assert caps.model_name == "anthropic/claude-3-opus"
|
assert caps.model_name == "anthropic/claude-3-opus"
|
||||||
assert caps.friendly_name == "OpenRouter"
|
assert caps.friendly_name == "OpenRouter"
|
||||||
assert caps.max_tokens == 200000
|
assert caps.context_window == 200000
|
||||||
assert not caps.supports_extended_thinking
|
assert not caps.supports_extended_thinking
|
||||||
|
|
||||||
def test_duplicate_alias_detection(self):
|
def test_duplicate_alias_detection(self):
|
||||||
@@ -147,13 +147,13 @@ class TestOpenRouterModelRegistry:
|
|||||||
os.unlink(temp_path)
|
os.unlink(temp_path)
|
||||||
|
|
||||||
def test_backwards_compatibility_max_tokens(self):
|
def test_backwards_compatibility_max_tokens(self):
|
||||||
"""Test backwards compatibility with old max_tokens field."""
|
"""Test that old max_tokens field is no longer supported (should result in empty registry)."""
|
||||||
config_data = {
|
config_data = {
|
||||||
"models": [
|
"models": [
|
||||||
{
|
{
|
||||||
"model_name": "test/old-model",
|
"model_name": "test/old-model",
|
||||||
"aliases": ["old"],
|
"aliases": ["old"],
|
||||||
"max_tokens": 16384, # Old field name
|
"max_tokens": 16384, # Old field name should cause error
|
||||||
"supports_extended_thinking": False,
|
"supports_extended_thinking": False,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -164,15 +164,12 @@ class TestOpenRouterModelRegistry:
|
|||||||
temp_path = f.name
|
temp_path = f.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Should gracefully handle the error and result in empty registry
|
||||||
registry = OpenRouterModelRegistry(config_path=temp_path)
|
registry = OpenRouterModelRegistry(config_path=temp_path)
|
||||||
config = registry.resolve("old")
|
# Registry should be empty due to config error
|
||||||
|
assert len(registry.list_models()) == 0
|
||||||
assert config is not None
|
assert len(registry.list_aliases()) == 0
|
||||||
assert config.context_window == 16384 # Should be converted
|
assert registry.resolve("old") is None
|
||||||
|
|
||||||
# Check capabilities still work
|
|
||||||
caps = config.to_capabilities()
|
|
||||||
assert caps.max_tokens == 16384
|
|
||||||
finally:
|
finally:
|
||||||
os.unlink(temp_path)
|
os.unlink(temp_path)
|
||||||
|
|
||||||
@@ -215,7 +212,7 @@ class TestOpenRouterModelRegistry:
|
|||||||
)
|
)
|
||||||
|
|
||||||
caps = config.to_capabilities()
|
caps = config.to_capabilities()
|
||||||
assert caps.max_tokens == 128000
|
assert caps.context_window == 128000
|
||||||
assert caps.supports_extended_thinking
|
assert caps.supports_extended_thinking
|
||||||
assert caps.supports_system_prompts
|
assert caps.supports_system_prompts
|
||||||
assert caps.supports_streaming
|
assert caps.supports_streaming
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ class TestGeminiProvider:
|
|||||||
|
|
||||||
assert capabilities.provider == ProviderType.GOOGLE
|
assert capabilities.provider == ProviderType.GOOGLE
|
||||||
assert capabilities.model_name == "gemini-2.5-flash-preview-05-20"
|
assert capabilities.model_name == "gemini-2.5-flash-preview-05-20"
|
||||||
assert capabilities.max_tokens == 1_048_576
|
assert capabilities.context_window == 1_048_576
|
||||||
assert capabilities.supports_extended_thinking
|
assert capabilities.supports_extended_thinking
|
||||||
|
|
||||||
def test_get_capabilities_pro_model(self):
|
def test_get_capabilities_pro_model(self):
|
||||||
@@ -165,7 +165,7 @@ class TestOpenAIProvider:
|
|||||||
|
|
||||||
assert capabilities.provider == ProviderType.OPENAI
|
assert capabilities.provider == ProviderType.OPENAI
|
||||||
assert capabilities.model_name == "o3-mini"
|
assert capabilities.model_name == "o3-mini"
|
||||||
assert capabilities.max_tokens == 200_000
|
assert capabilities.context_window == 200_000
|
||||||
assert not capabilities.supports_extended_thinking
|
assert not capabilities.supports_extended_thinking
|
||||||
|
|
||||||
def test_validate_model_names(self):
|
def test_validate_model_names(self):
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from typing import Any, Literal, Optional
|
|||||||
from mcp.types import TextContent
|
from mcp.types import TextContent
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from config import MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
|
from config import MCP_PROMPT_SIZE_LIMIT
|
||||||
from providers import ModelProvider, ModelProviderRegistry
|
from providers import ModelProvider, ModelProviderRegistry
|
||||||
from utils import check_token_limit
|
from utils import check_token_limit
|
||||||
from utils.conversation_memory import (
|
from utils.conversation_memory import (
|
||||||
@@ -414,7 +414,7 @@ class BaseTool(ABC):
|
|||||||
request_files: List of files requested for current tool execution
|
request_files: List of files requested for current tool execution
|
||||||
continuation_id: Thread continuation ID, or None for new conversations
|
continuation_id: Thread continuation ID, or None for new conversations
|
||||||
context_description: Description for token limit validation (e.g. "Code", "New files")
|
context_description: Description for token limit validation (e.g. "Code", "New files")
|
||||||
max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS)
|
max_tokens: Maximum tokens to use (defaults to remaining budget or model-specific content allocation)
|
||||||
reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
|
reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
|
||||||
remaining_budget: Remaining token budget after conversation history (from server.py)
|
remaining_budget: Remaining token budget after conversation history (from server.py)
|
||||||
arguments: Original tool arguments (used to extract _remaining_tokens if available)
|
arguments: Original tool arguments (used to extract _remaining_tokens if available)
|
||||||
@@ -473,17 +473,17 @@ class BaseTool(ABC):
|
|||||||
capabilities = provider.get_capabilities(model_name)
|
capabilities = provider.get_capabilities(model_name)
|
||||||
|
|
||||||
# Calculate content allocation based on model capacity
|
# Calculate content allocation based on model capacity
|
||||||
if capabilities.max_tokens < 300_000:
|
if capabilities.context_window < 300_000:
|
||||||
# Smaller context models: 60% content, 40% response
|
# Smaller context models: 60% content, 40% response
|
||||||
model_content_tokens = int(capabilities.max_tokens * 0.6)
|
model_content_tokens = int(capabilities.context_window * 0.6)
|
||||||
else:
|
else:
|
||||||
# Larger context models: 80% content, 20% response
|
# Larger context models: 80% content, 20% response
|
||||||
model_content_tokens = int(capabilities.max_tokens * 0.8)
|
model_content_tokens = int(capabilities.context_window * 0.8)
|
||||||
|
|
||||||
effective_max_tokens = model_content_tokens - reserve_tokens
|
effective_max_tokens = model_content_tokens - reserve_tokens
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
|
f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
|
||||||
f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total"
|
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
|
||||||
)
|
)
|
||||||
except (ValueError, AttributeError) as e:
|
except (ValueError, AttributeError) as e:
|
||||||
# Handle specific errors: provider not found, model not supported, missing attributes
|
# Handle specific errors: provider not found, model not supported, missing attributes
|
||||||
@@ -491,17 +491,13 @@ class BaseTool(ABC):
|
|||||||
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
|
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
|
||||||
)
|
)
|
||||||
# Fall back to conservative default for safety
|
# Fall back to conservative default for safety
|
||||||
from config import MAX_CONTENT_TOKENS
|
effective_max_tokens = 100_000 - reserve_tokens
|
||||||
|
|
||||||
effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch any other unexpected errors
|
# Catch any other unexpected errors
|
||||||
logger.error(
|
logger.error(
|
||||||
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
|
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
|
||||||
)
|
)
|
||||||
from config import MAX_CONTENT_TOKENS
|
effective_max_tokens = 100_000 - reserve_tokens
|
||||||
|
|
||||||
effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
|
|
||||||
|
|
||||||
# Ensure we have a reasonable minimum budget
|
# Ensure we have a reasonable minimum budget
|
||||||
effective_max_tokens = max(1000, effective_max_tokens)
|
effective_max_tokens = max(1000, effective_max_tokens)
|
||||||
@@ -1233,7 +1229,7 @@ When recommending searches, be specific about what information you need and why
|
|||||||
"""
|
"""
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _validate_token_limit(self, text: str, context_type: str = "Context") -> None:
|
def _validate_token_limit(self, text: str, context_type: str = "Context", context_window: int = 200_000) -> None:
|
||||||
"""
|
"""
|
||||||
Validate token limit and raise ValueError if exceeded.
|
Validate token limit and raise ValueError if exceeded.
|
||||||
|
|
||||||
@@ -1243,14 +1239,15 @@ When recommending searches, be specific about what information you need and why
|
|||||||
Args:
|
Args:
|
||||||
text: The text to check
|
text: The text to check
|
||||||
context_type: Description of what's being checked (for error message)
|
context_type: Description of what's being checked (for error message)
|
||||||
|
context_window: The model's context window size
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If text exceeds MAX_CONTEXT_TOKENS
|
ValueError: If text exceeds context_window
|
||||||
"""
|
"""
|
||||||
within_limit, estimated_tokens = check_token_limit(text)
|
within_limit, estimated_tokens = check_token_limit(text, context_window)
|
||||||
if not within_limit:
|
if not within_limit:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {MAX_CONTEXT_TOKENS:,} tokens."
|
f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {context_window:,} tokens."
|
||||||
)
|
)
|
||||||
|
|
||||||
def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]:
|
def _validate_and_correct_temperature(self, model_name: str, temperature: float) -> tuple[float, list[str]]:
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ from typing import Any, Literal, Optional
|
|||||||
from mcp.types import TextContent
|
from mcp.types import TextContent
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
|
||||||
from config import MAX_CONTEXT_TOKENS
|
|
||||||
from prompts.tool_prompts import PRECOMMIT_PROMPT
|
from prompts.tool_prompts import PRECOMMIT_PROMPT
|
||||||
from utils.file_utils import translate_file_paths, translate_path_for_environment
|
from utils.file_utils import translate_file_paths, translate_path_for_environment
|
||||||
from utils.git_utils import find_git_repositories, get_git_status, run_git_command
|
from utils.git_utils import find_git_repositories, get_git_status, run_git_command
|
||||||
@@ -23,6 +22,9 @@ from utils.token_utils import estimate_tokens
|
|||||||
from .base import BaseTool, ToolRequest
|
from .base import BaseTool, ToolRequest
|
||||||
from .models import ToolOutput
|
from .models import ToolOutput
|
||||||
|
|
||||||
|
# Conservative fallback for token limits
|
||||||
|
DEFAULT_CONTEXT_WINDOW = 200_000
|
||||||
|
|
||||||
|
|
||||||
class PrecommitRequest(ToolRequest):
|
class PrecommitRequest(ToolRequest):
|
||||||
"""Request model for precommit tool"""
|
"""Request model for precommit tool"""
|
||||||
@@ -186,7 +188,7 @@ class Precommit(BaseTool):
|
|||||||
all_diffs = []
|
all_diffs = []
|
||||||
repo_summaries = []
|
repo_summaries = []
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
max_tokens = MAX_CONTEXT_TOKENS - 50000 # Reserve tokens for prompt and response
|
max_tokens = DEFAULT_CONTEXT_WINDOW - 50000 # Reserve tokens for prompt and response
|
||||||
|
|
||||||
for repo_path in repositories:
|
for repo_path in repositories:
|
||||||
repo_name = os.path.basename(repo_path) or "root"
|
repo_name = os.path.basename(repo_path) or "root"
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ import os
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from .token_utils import MAX_CONTEXT_TOKENS, estimate_tokens
|
from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -508,14 +508,14 @@ def read_files(
|
|||||||
Args:
|
Args:
|
||||||
file_paths: List of file or directory paths (absolute paths required)
|
file_paths: List of file or directory paths (absolute paths required)
|
||||||
code: Optional direct code to include (prioritized over files)
|
code: Optional direct code to include (prioritized over files)
|
||||||
max_tokens: Maximum tokens to use (defaults to MAX_CONTEXT_TOKENS)
|
max_tokens: Maximum tokens to use (defaults to DEFAULT_CONTEXT_WINDOW)
|
||||||
reserve_tokens: Tokens to reserve for prompt and response (default 50K)
|
reserve_tokens: Tokens to reserve for prompt and response (default 50K)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: All file contents formatted for AI consumption
|
str: All file contents formatted for AI consumption
|
||||||
"""
|
"""
|
||||||
if max_tokens is None:
|
if max_tokens is None:
|
||||||
max_tokens = MAX_CONTEXT_TOKENS
|
max_tokens = DEFAULT_CONTEXT_WINDOW
|
||||||
|
|
||||||
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
|
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ class ModelContext:
|
|||||||
Returns:
|
Returns:
|
||||||
TokenAllocation with calculated budgets
|
TokenAllocation with calculated budgets
|
||||||
"""
|
"""
|
||||||
total_tokens = self.capabilities.max_tokens
|
total_tokens = self.capabilities.context_window
|
||||||
|
|
||||||
# Dynamic allocation based on model capacity
|
# Dynamic allocation based on model capacity
|
||||||
if total_tokens < 300_000:
|
if total_tokens < 300_000:
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ approximate. For production systems requiring precise token counts,
|
|||||||
consider using the actual tokenizer for the specific model.
|
consider using the actual tokenizer for the specific model.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from config import MAX_CONTEXT_TOKENS
|
# Default fallback for token limit (conservative estimate)
|
||||||
|
DEFAULT_CONTEXT_WINDOW = 200_000 # Conservative fallback for unknown models
|
||||||
|
|
||||||
|
|
||||||
def estimate_tokens(text: str) -> int:
|
def estimate_tokens(text: str) -> int:
|
||||||
@@ -32,9 +33,9 @@ def estimate_tokens(text: str) -> int:
|
|||||||
return len(text) // 4
|
return len(text) // 4
|
||||||
|
|
||||||
|
|
||||||
def check_token_limit(text: str) -> tuple[bool, int]:
|
def check_token_limit(text: str, context_window: int = DEFAULT_CONTEXT_WINDOW) -> tuple[bool, int]:
|
||||||
"""
|
"""
|
||||||
Check if text exceeds the maximum token limit for Gemini models.
|
Check if text exceeds the specified token limit.
|
||||||
|
|
||||||
This function is used to validate that prepared prompts will fit
|
This function is used to validate that prepared prompts will fit
|
||||||
within the model's context window, preventing API errors and ensuring
|
within the model's context window, preventing API errors and ensuring
|
||||||
@@ -42,11 +43,12 @@ def check_token_limit(text: str) -> tuple[bool, int]:
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: The text to check
|
text: The text to check
|
||||||
|
context_window: The model's context window size (defaults to conservative fallback)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[bool, int]: (is_within_limit, estimated_tokens)
|
Tuple[bool, int]: (is_within_limit, estimated_tokens)
|
||||||
- is_within_limit: True if the text fits within MAX_CONTEXT_TOKENS
|
- is_within_limit: True if the text fits within context_window
|
||||||
- estimated_tokens: The estimated token count
|
- estimated_tokens: The estimated token count
|
||||||
"""
|
"""
|
||||||
estimated = estimate_tokens(text)
|
estimated = estimate_tokens(text)
|
||||||
return estimated <= MAX_CONTEXT_TOKENS, estimated
|
return estimated <= context_window, estimated
|
||||||
|
|||||||
Reference in New Issue
Block a user