Vision support via images / pdfs etc that can be passed on to other models as part of analysis, additional context etc.

Image processing pipeline added
OpenAI GPT-4.1 support
Chat tool prompt enhancement
Lint and code quality improvements
This commit is contained in:
Fahad
2025-06-16 13:14:53 +04:00
parent d498e9854b
commit 97fa6781cf
26 changed files with 1328 additions and 52 deletions

View File

@@ -1,5 +1,6 @@
"""Base class for OpenAI-compatible API providers."""
import base64
import ipaddress
import logging
import os
@@ -229,6 +230,7 @@ class OpenAICompatibleProvider(ModelProvider):
system_prompt: Optional[str] = None,
temperature: float = 0.7,
max_output_tokens: Optional[int] = None,
images: Optional[list[str]] = None,
**kwargs,
) -> ModelResponse:
"""Generate content using the OpenAI-compatible API.
@@ -255,7 +257,32 @@ class OpenAICompatibleProvider(ModelProvider):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
# Prepare user message with text and potentially images
user_content = []
user_content.append({"type": "text", "text": prompt})
# Add images if provided and model supports vision
if images and self._supports_vision(model_name):
for image_path in images:
try:
image_content = self._process_image(image_path)
if image_content:
user_content.append(image_content)
except Exception as e:
logging.warning(f"Failed to process image {image_path}: {e}")
# Continue with other images and text
continue
elif images and not self._supports_vision(model_name):
logging.warning(f"Model {model_name} does not support images, ignoring {len(images)} image(s)")
# Add user message
if len(user_content) == 1:
# Only text content, use simple string format for compatibility
messages.append({"role": "user", "content": prompt})
else:
# Text + images, use content array format
messages.append({"role": "user", "content": user_content})
# Prepare completion parameters
completion_params = {
@@ -424,3 +451,66 @@ class OpenAICompatibleProvider(ModelProvider):
Default is False for OpenAI-compatible providers.
"""
return False
def _supports_vision(self, model_name: str) -> bool:
"""Check if the model supports vision (image processing).
Default implementation for OpenAI-compatible providers.
Subclasses should override with specific model support.
"""
# Common vision-capable models - only include models that actually support images
vision_models = {
"gpt-4o",
"gpt-4o-mini",
"gpt-4-turbo",
"gpt-4-vision-preview",
"gpt-4.1-2025-04-14", # GPT-4.1 supports vision
"o3",
"o3-mini",
"o3-pro",
"o4-mini",
"o4-mini-high",
# Note: Claude models would be handled by a separate provider
}
supports = model_name.lower() in vision_models
logging.debug(f"Model '{model_name}' vision support: {supports}")
return supports
def _process_image(self, image_path: str) -> Optional[dict]:
"""Process an image for OpenAI-compatible API."""
try:
if image_path.startswith("data:image/"):
# Handle data URL: data:image/png;base64,iVBORw0...
return {"type": "image_url", "image_url": {"url": image_path}}
else:
# Handle file path - translate for Docker environment
from utils.file_utils import translate_path_for_environment
translated_path = translate_path_for_environment(image_path)
logging.debug(f"Translated image path from '{image_path}' to '{translated_path}'")
if not os.path.exists(translated_path):
logging.warning(f"Image file not found: {translated_path} (original: {image_path})")
return None
# Use translated path for all subsequent operations
image_path = translated_path
# Detect MIME type from file extension using centralized mappings
from utils.file_types import get_image_mime_type
ext = os.path.splitext(image_path)[1].lower()
mime_type = get_image_mime_type(ext)
logging.debug(f"Processing image '{image_path}' with extension '{ext}' as MIME type '{mime_type}'")
# Read and encode the image
with open(image_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode()
# Create data URL for OpenAI API
data_url = f"data:{mime_type};base64,{image_data}"
return {"type": "image_url", "image_url": {"url": data_url}}
except Exception as e:
logging.error(f"Error processing image {image_path}: {e}")
return None