Vision support via images / pdfs etc that can be passed on to other models as part of analysis, additional context etc.
Image processing pipeline added OpenAI GPT-4.1 support Chat tool prompt enhancement Lint and code quality improvements
This commit is contained in:
160
tools/base.py
160
tools/base.py
@@ -27,6 +27,7 @@ if TYPE_CHECKING:
|
||||
|
||||
from config import MCP_PROMPT_SIZE_LIMIT
|
||||
from providers import ModelProvider, ModelProviderRegistry
|
||||
from providers.base import ProviderType
|
||||
from utils import check_token_limit
|
||||
from utils.conversation_memory import (
|
||||
MAX_CONVERSATION_TURNS,
|
||||
@@ -84,6 +85,17 @@ class ToolRequest(BaseModel):
|
||||
"additional findings, or answers to follow-up questions. Can be used across different tools."
|
||||
),
|
||||
)
|
||||
images: Optional[list[str]] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Optional image(s) for visual context. Accepts absolute file paths or "
|
||||
"base64 data URLs. Only provide when user explicitly mentions images. "
|
||||
"When including images, please describe what you believe each image contains "
|
||||
"(e.g., 'screenshot of error dialog', 'architecture diagram', 'code snippet') "
|
||||
"to aid with contextual understanding. Useful for UI discussions, diagrams, "
|
||||
"visual problems, error screens, architecture mockups, and visual analysis tasks."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class BaseTool(ABC):
|
||||
@@ -981,6 +993,139 @@ When recommending searches, be specific about what information you need and why
|
||||
}
|
||||
return None
|
||||
|
||||
def _validate_image_limits(
|
||||
self, images: Optional[list[str]], model_name: str, continuation_id: Optional[str] = None
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Validate image size against model capabilities at MCP boundary.
|
||||
|
||||
This performs strict validation to ensure we don't exceed model-specific
|
||||
image size limits. Uses capability-based validation with actual model
|
||||
configuration rather than hard-coded limits.
|
||||
|
||||
Args:
|
||||
images: List of image paths/data URLs to validate
|
||||
model_name: Name of the model to check limits against
|
||||
|
||||
Returns:
|
||||
Optional[dict]: Error response if validation fails, None if valid
|
||||
"""
|
||||
if not images:
|
||||
return None
|
||||
|
||||
# Get model capabilities to check image support and size limits
|
||||
try:
|
||||
provider = self.get_model_provider(model_name)
|
||||
capabilities = provider.get_capabilities(model_name)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get capabilities for model {model_name}: {e}")
|
||||
# Fall back to checking custom models configuration
|
||||
capabilities = None
|
||||
|
||||
# Check if model supports images at all
|
||||
supports_images = False
|
||||
max_size_mb = 0.0
|
||||
|
||||
if capabilities:
|
||||
supports_images = capabilities.supports_images
|
||||
max_size_mb = capabilities.max_image_size_mb
|
||||
else:
|
||||
# Fall back to custom models configuration
|
||||
try:
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
custom_models_path = Path(__file__).parent.parent / "conf" / "custom_models.json"
|
||||
if custom_models_path.exists():
|
||||
with open(custom_models_path) as f:
|
||||
custom_config = json.load(f)
|
||||
|
||||
# Check if model is in custom models list
|
||||
for model_config in custom_config.get("models", []):
|
||||
if model_config.get("model_name") == model_name or model_name in model_config.get(
|
||||
"aliases", []
|
||||
):
|
||||
supports_images = model_config.get("supports_images", False)
|
||||
max_size_mb = model_config.get("max_image_size_mb", 0.0)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load custom models config: {e}")
|
||||
|
||||
# If model doesn't support images, reject
|
||||
if not supports_images:
|
||||
return {
|
||||
"status": "error",
|
||||
"content": (
|
||||
f"Image support not available: Model '{model_name}' does not support image processing. "
|
||||
f"Please use a vision-capable model such as 'gemini-2.5-flash-preview-05-20', 'o3', "
|
||||
f"or 'claude-3-opus' for image analysis tasks."
|
||||
),
|
||||
"content_type": "text",
|
||||
"metadata": {
|
||||
"error_type": "validation_error",
|
||||
"model_name": model_name,
|
||||
"supports_images": False,
|
||||
"image_count": len(images),
|
||||
},
|
||||
}
|
||||
|
||||
# Calculate total size of all images
|
||||
total_size_mb = 0.0
|
||||
for image_path in images:
|
||||
try:
|
||||
if image_path.startswith("...
|
||||
_, data = image_path.split(",", 1)
|
||||
# Base64 encoding increases size by ~33%, so decode to get actual size
|
||||
import base64
|
||||
|
||||
actual_size = len(base64.b64decode(data))
|
||||
total_size_mb += actual_size / (1024 * 1024)
|
||||
else:
|
||||
# Handle file path
|
||||
if os.path.exists(image_path):
|
||||
file_size = os.path.getsize(image_path)
|
||||
total_size_mb += file_size / (1024 * 1024)
|
||||
else:
|
||||
logger.warning(f"Image file not found: {image_path}")
|
||||
# Assume a reasonable size for missing files to avoid breaking validation
|
||||
total_size_mb += 1.0 # 1MB assumption
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get size for image {image_path}: {e}")
|
||||
# Assume a reasonable size for problematic files
|
||||
total_size_mb += 1.0 # 1MB assumption
|
||||
|
||||
# Apply 40MB cap for custom models as requested
|
||||
effective_limit_mb = max_size_mb
|
||||
if hasattr(capabilities, "provider") and capabilities.provider == ProviderType.CUSTOM:
|
||||
effective_limit_mb = min(max_size_mb, 40.0)
|
||||
elif not capabilities: # Fallback case for custom models
|
||||
effective_limit_mb = min(max_size_mb, 40.0)
|
||||
|
||||
# Validate against size limit
|
||||
if total_size_mb > effective_limit_mb:
|
||||
return {
|
||||
"status": "error",
|
||||
"content": (
|
||||
f"Image size limit exceeded: Model '{model_name}' supports maximum {effective_limit_mb:.1f}MB "
|
||||
f"for all images combined, but {total_size_mb:.1f}MB was provided. "
|
||||
f"Please reduce image sizes or count and try again."
|
||||
),
|
||||
"content_type": "text",
|
||||
"metadata": {
|
||||
"error_type": "validation_error",
|
||||
"model_name": model_name,
|
||||
"total_size_mb": round(total_size_mb, 2),
|
||||
"limit_mb": round(effective_limit_mb, 2),
|
||||
"image_count": len(images),
|
||||
"supports_images": supports_images,
|
||||
},
|
||||
}
|
||||
|
||||
# All validations passed
|
||||
logger.debug(f"Image validation passed: {len(images)} images")
|
||||
return None
|
||||
|
||||
def estimate_tokens_smart(self, file_path: str) -> int:
|
||||
"""
|
||||
Estimate tokens for a file using file-type aware ratios.
|
||||
@@ -1131,6 +1276,9 @@ When recommending searches, be specific about what information you need and why
|
||||
)
|
||||
return [TextContent(type="text", text=error_output.model_dump_json())]
|
||||
|
||||
# Extract and validate images from request
|
||||
images = getattr(request, "images", None) or []
|
||||
|
||||
# Check if we have continuation_id - if so, conversation history is already embedded
|
||||
continuation_id = getattr(request, "continuation_id", None)
|
||||
|
||||
@@ -1215,6 +1363,12 @@ When recommending searches, be specific about what information you need and why
|
||||
# Only set this after auto mode validation to prevent "auto" being used as a model name
|
||||
self._current_model_name = model_name
|
||||
|
||||
# Validate images at MCP boundary if any were provided
|
||||
if images:
|
||||
image_validation_error = self._validate_image_limits(images, model_name, continuation_id)
|
||||
if image_validation_error:
|
||||
return [TextContent(type="text", text=json.dumps(image_validation_error))]
|
||||
|
||||
temperature = getattr(request, "temperature", None)
|
||||
if temperature is None:
|
||||
temperature = self.get_default_temperature()
|
||||
@@ -1247,6 +1401,7 @@ When recommending searches, be specific about what information you need and why
|
||||
system_prompt=system_prompt,
|
||||
temperature=temperature,
|
||||
thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None,
|
||||
images=images if images else None, # Pass images via kwargs
|
||||
)
|
||||
|
||||
logger.info(f"Received response from {provider.get_provider_type().value} API for {self.name}")
|
||||
@@ -1298,6 +1453,7 @@ When recommending searches, be specific about what information you need and why
|
||||
system_prompt=system_prompt,
|
||||
temperature=temperature,
|
||||
thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None,
|
||||
images=images if images else None, # Pass images via kwargs in retry too
|
||||
)
|
||||
|
||||
if retry_response.content:
|
||||
@@ -1398,6 +1554,7 @@ When recommending searches, be specific about what information you need and why
|
||||
continuation_id = getattr(request, "continuation_id", None)
|
||||
if continuation_id:
|
||||
request_files = getattr(request, "files", []) or []
|
||||
request_images = getattr(request, "images", []) or []
|
||||
# Extract model metadata for conversation tracking
|
||||
model_provider = None
|
||||
model_name = None
|
||||
@@ -1417,6 +1574,7 @@ When recommending searches, be specific about what information you need and why
|
||||
"assistant",
|
||||
formatted_content,
|
||||
files=request_files,
|
||||
images=request_images,
|
||||
tool_name=self.name,
|
||||
model_provider=model_provider,
|
||||
model_name=model_name,
|
||||
@@ -1519,6 +1677,7 @@ When recommending searches, be specific about what information you need and why
|
||||
# Use actually processed files from file preparation instead of original request files
|
||||
# This ensures directories are tracked as their individual expanded files
|
||||
request_files = getattr(self, "_actually_processed_files", []) or getattr(request, "files", []) or []
|
||||
request_images = getattr(request, "images", []) or []
|
||||
# Extract model metadata
|
||||
model_provider = None
|
||||
model_name = None
|
||||
@@ -1538,6 +1697,7 @@ When recommending searches, be specific about what information you need and why
|
||||
"assistant",
|
||||
content,
|
||||
files=request_files,
|
||||
images=request_images,
|
||||
tool_name=self.name,
|
||||
model_provider=model_provider,
|
||||
model_name=model_name,
|
||||
|
||||
Reference in New Issue
Block a user