Fixed for auto mode

This commit is contained in:
Fahad
2025-06-14 01:16:15 +04:00
parent 14c266a162
commit 7fc1186a7c
2 changed files with 72 additions and 27 deletions

View File

@@ -14,7 +14,7 @@ import os
# These values are used in server responses and for tracking releases # These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info # IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH # Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "4.2.1" __version__ = "4.2.2"
# Last update date in ISO format # Last update date in ISO format
__updated__ = "2025-06-14" __updated__ = "2025-06-14"
# Primary maintainer # Primary maintainer

View File

@@ -469,36 +469,80 @@ class BaseTool(ABC):
from config import DEFAULT_MODEL from config import DEFAULT_MODEL
model_name = getattr(self, "_current_model_name", None) or DEFAULT_MODEL model_name = getattr(self, "_current_model_name", None) or DEFAULT_MODEL
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
# Calculate content allocation based on model capacity # Handle auto mode gracefully
if capabilities.context_window < 300_000: if model_name.lower() == "auto":
# Smaller context models: 60% content, 40% response from providers.registry import ModelProviderRegistry
model_content_tokens = int(capabilities.context_window * 0.6)
else:
# Larger context models: 80% content, 20% response
model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = model_content_tokens - reserve_tokens # Use the preferred fallback model for capacity estimation
# This properly handles different providers (OpenAI=200K, Gemini=1M)
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
logger.debug( logger.debug(
f"[FILES] {self.name}: Using model-specific limit for {model_name}: " f"[FILES] {self.name}: Auto mode detected, using {fallback_model} for capacity estimation"
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
) )
except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes try:
logger.warning( provider = self.get_model_provider(fallback_model)
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}" capabilities = provider.get_capabilities(fallback_model)
)
# Fall back to conservative default for safety # Calculate content allocation based on model capacity
effective_max_tokens = 100_000 - reserve_tokens if capabilities.context_window < 300_000:
except Exception as e: # Smaller context models: 60% content, 40% response
# Catch any other unexpected errors model_content_tokens = int(capabilities.context_window * 0.6)
logger.error( else:
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}" # Larger context models: 80% content, 20% response
) model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = 100_000 - reserve_tokens
effective_max_tokens = model_content_tokens - reserve_tokens
logger.debug(
f"[FILES] {self.name}: Using {fallback_model} capacity for auto mode: "
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
)
except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes
logger.warning(
f"[FILES] {self.name}: Could not get capabilities for fallback model {fallback_model}: {type(e).__name__}: {e}"
)
# Fall back to conservative default for safety
effective_max_tokens = 100_000 - reserve_tokens
except Exception as e:
# Catch any other unexpected errors
logger.error(
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
)
effective_max_tokens = 100_000 - reserve_tokens
else:
# Normal mode - use the specified model
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
# Calculate content allocation based on model capacity
if capabilities.context_window < 300_000:
# Smaller context models: 60% content, 40% response
model_content_tokens = int(capabilities.context_window * 0.6)
else:
# Larger context models: 80% content, 20% response
model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = model_content_tokens - reserve_tokens
logger.debug(
f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
)
except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes
logger.warning(
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
)
# Fall back to conservative default for safety
effective_max_tokens = 100_000 - reserve_tokens
except Exception as e:
# Catch any other unexpected errors
logger.error(
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
)
effective_max_tokens = 100_000 - reserve_tokens
# Ensure we have a reasonable minimum budget # Ensure we have a reasonable minimum budget
effective_max_tokens = max(1000, effective_max_tokens) effective_max_tokens = max(1000, effective_max_tokens)
@@ -866,6 +910,7 @@ When recommending searches, be specific about what information you need and why
return [TextContent(type="text", text=error_output.model_dump_json())] return [TextContent(type="text", text=error_output.model_dump_json())]
# Store model name for use by helper methods like _prepare_file_content_for_prompt # Store model name for use by helper methods like _prepare_file_content_for_prompt
# Only set this after auto mode validation to prevent "auto" being used as a model name
self._current_model_name = model_name self._current_model_name = model_name
temperature = getattr(request, "temperature", None) temperature = getattr(request, "temperature", None)