Vision support via images / pdfs etc that can be passed on to other models as part of analysis, additional context etc.
Image processing pipeline added OpenAI GPT-4.1 support Chat tool prompt enhancement Lint and code quality improvements
This commit is contained in:
@@ -142,6 +142,7 @@ class ConversationTurn(BaseModel):
|
||||
content: The actual message content/response
|
||||
timestamp: ISO timestamp when this turn was created
|
||||
files: List of file paths referenced in this specific turn
|
||||
images: List of image paths referenced in this specific turn
|
||||
tool_name: Which tool generated this turn (for cross-tool tracking)
|
||||
model_provider: Provider used (e.g., "google", "openai")
|
||||
model_name: Specific model used (e.g., "gemini-2.5-flash-preview-05-20", "o3-mini")
|
||||
@@ -152,6 +153,7 @@ class ConversationTurn(BaseModel):
|
||||
content: str
|
||||
timestamp: str
|
||||
files: Optional[list[str]] = None # Files referenced in this turn
|
||||
images: Optional[list[str]] = None # Images referenced in this turn
|
||||
tool_name: Optional[str] = None # Tool used for this turn
|
||||
model_provider: Optional[str] = None # Model provider (google, openai, etc)
|
||||
model_name: Optional[str] = None # Specific model used
|
||||
@@ -300,6 +302,7 @@ def add_turn(
|
||||
role: str,
|
||||
content: str,
|
||||
files: Optional[list[str]] = None,
|
||||
images: Optional[list[str]] = None,
|
||||
tool_name: Optional[str] = None,
|
||||
model_provider: Optional[str] = None,
|
||||
model_name: Optional[str] = None,
|
||||
@@ -318,6 +321,7 @@ def add_turn(
|
||||
role: "user" (Claude) or "assistant" (Gemini/O3/etc)
|
||||
content: The actual message/response content
|
||||
files: Optional list of files referenced in this turn
|
||||
images: Optional list of images referenced in this turn
|
||||
tool_name: Name of the tool adding this turn (for attribution)
|
||||
model_provider: Provider used (e.g., "google", "openai")
|
||||
model_name: Specific model used (e.g., "gemini-2.5-flash-preview-05-20", "o3-mini")
|
||||
@@ -335,6 +339,7 @@ def add_turn(
|
||||
- Refreshes thread TTL to configured timeout on successful update
|
||||
- Turn limits prevent runaway conversations
|
||||
- File references are preserved for cross-tool access with atomic ordering
|
||||
- Image references are preserved for cross-tool visual context
|
||||
- Model information enables cross-provider conversations
|
||||
"""
|
||||
logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
|
||||
@@ -355,6 +360,7 @@ def add_turn(
|
||||
content=content,
|
||||
timestamp=datetime.now(timezone.utc).isoformat(),
|
||||
files=files, # Preserved for cross-tool file context
|
||||
images=images, # Preserved for cross-tool visual context
|
||||
tool_name=tool_name, # Track which tool generated this turn
|
||||
model_provider=model_provider, # Track model provider
|
||||
model_name=model_name, # Track specific model
|
||||
@@ -489,6 +495,78 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
|
||||
return file_list
|
||||
|
||||
|
||||
def get_conversation_image_list(context: ThreadContext) -> list[str]:
|
||||
"""
|
||||
Extract all unique images from conversation turns with newest-first prioritization.
|
||||
|
||||
This function implements the identical prioritization logic as get_conversation_file_list()
|
||||
to ensure consistency in how images are handled across conversation turns. It walks
|
||||
backwards through conversation turns (from newest to oldest) and collects unique image
|
||||
references, ensuring that when the same image appears in multiple turns, the reference
|
||||
from the NEWEST turn takes precedence.
|
||||
|
||||
PRIORITIZATION ALGORITHM:
|
||||
1. Iterate through turns in REVERSE order (index len-1 down to 0)
|
||||
2. For each turn, process images in the order they appear in turn.images
|
||||
3. Add image to result list only if not already seen (newest reference wins)
|
||||
4. Skip duplicate images that were already added from newer turns
|
||||
|
||||
This ensures that:
|
||||
- Images from newer conversation turns appear first in the result
|
||||
- When the same image is referenced multiple times, only the newest reference is kept
|
||||
- The order reflects the most recent conversation context
|
||||
|
||||
Example:
|
||||
Turn 1: images = ["diagram.png", "flow.jpg"]
|
||||
Turn 2: images = ["error.png"]
|
||||
Turn 3: images = ["diagram.png", "updated.png"] # diagram.png appears again
|
||||
|
||||
Result: ["diagram.png", "updated.png", "error.png", "flow.jpg"]
|
||||
(diagram.png from Turn 3 takes precedence over Turn 1)
|
||||
|
||||
Args:
|
||||
context: ThreadContext containing all conversation turns to process
|
||||
|
||||
Returns:
|
||||
list[str]: Unique image paths ordered by newest reference first.
|
||||
Empty list if no turns exist or no images are referenced.
|
||||
|
||||
Performance:
|
||||
- Time Complexity: O(n*m) where n=turns, m=avg images per turn
|
||||
- Space Complexity: O(i) where i=total unique images
|
||||
- Uses set for O(1) duplicate detection
|
||||
"""
|
||||
if not context.turns:
|
||||
logger.debug("[IMAGES] No turns found, returning empty image list")
|
||||
return []
|
||||
|
||||
# Collect images by walking backwards (newest to oldest turns)
|
||||
seen_images = set()
|
||||
image_list = []
|
||||
|
||||
logger.debug(f"[IMAGES] Collecting images from {len(context.turns)} turns (newest first)")
|
||||
|
||||
# Process turns in reverse order (newest first) - this is the CORE of newest-first prioritization
|
||||
# By iterating from len-1 down to 0, we encounter newer turns before older turns
|
||||
# When we find a duplicate image, we skip it because the newer version is already in our list
|
||||
for i in range(len(context.turns) - 1, -1, -1): # REVERSE: newest turn first
|
||||
turn = context.turns[i]
|
||||
if turn.images:
|
||||
logger.debug(f"[IMAGES] Turn {i + 1} has {len(turn.images)} images: {turn.images}")
|
||||
for image_path in turn.images:
|
||||
if image_path not in seen_images:
|
||||
# First time seeing this image - add it (this is the NEWEST reference)
|
||||
seen_images.add(image_path)
|
||||
image_list.append(image_path)
|
||||
logger.debug(f"[IMAGES] Added new image: {image_path} (from turn {i + 1})")
|
||||
else:
|
||||
# Image already seen from a NEWER turn - skip this older reference
|
||||
logger.debug(f"[IMAGES] Skipping duplicate image: {image_path} (newer version already included)")
|
||||
|
||||
logger.debug(f"[IMAGES] Final image list ({len(image_list)}): {image_list}")
|
||||
return image_list
|
||||
|
||||
|
||||
def _plan_file_inclusion_by_size(all_files: list[str], max_file_tokens: int) -> tuple[list[str], list[str], int]:
|
||||
"""
|
||||
Plan which files to include based on size constraints.
|
||||
|
||||
@@ -88,8 +88,9 @@ TEXT_DATA = {
|
||||
".lock", # Lock files
|
||||
}
|
||||
|
||||
# Image file extensions
|
||||
IMAGES = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".webp", ".ico", ".tiff", ".tif"}
|
||||
# Image file extensions - limited to what AI models actually support
|
||||
# Based on OpenAI and Gemini supported formats: PNG, JPEG, GIF, WebP
|
||||
IMAGES = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
|
||||
|
||||
# Binary executable and library extensions
|
||||
BINARIES = {
|
||||
@@ -240,3 +241,30 @@ def get_token_estimation_ratio(file_path: str) -> float:
|
||||
|
||||
extension = Path(file_path).suffix.lower()
|
||||
return TOKEN_ESTIMATION_RATIOS.get(extension, 3.5) # Conservative default
|
||||
|
||||
|
||||
# MIME type mappings for image files - limited to what AI models actually support
|
||||
# Based on OpenAI and Gemini supported formats: PNG, JPEG, GIF, WebP
|
||||
IMAGE_MIME_TYPES = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
}
|
||||
|
||||
|
||||
def get_image_mime_type(extension: str) -> str:
|
||||
"""
|
||||
Get the MIME type for an image file extension.
|
||||
|
||||
Args:
|
||||
extension: File extension (with or without leading dot)
|
||||
|
||||
Returns:
|
||||
MIME type string (default: image/jpeg for unknown extensions)
|
||||
"""
|
||||
if not extension.startswith("."):
|
||||
extension = "." + extension
|
||||
extension = extension.lower()
|
||||
return IMAGE_MIME_TYPES.get(extension, "image/jpeg")
|
||||
|
||||
@@ -48,6 +48,36 @@ from .file_types import BINARY_EXTENSIONS, CODE_EXTENSIONS, IMAGE_EXTENSIONS, TE
|
||||
from .security_config import CONTAINER_WORKSPACE, EXCLUDED_DIRS, MCP_SIGNATURE_FILES, SECURITY_ROOT, WORKSPACE_ROOT
|
||||
from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens
|
||||
|
||||
|
||||
def _is_builtin_custom_models_config(path_str: str) -> bool:
|
||||
"""
|
||||
Check if path points to the server's built-in custom_models.json config file.
|
||||
|
||||
This only matches the server's internal config, not user-specified CUSTOM_MODELS_CONFIG_PATH.
|
||||
We identify the built-in config by checking if it resolves to the server's conf directory.
|
||||
|
||||
Args:
|
||||
path_str: Path to check
|
||||
|
||||
Returns:
|
||||
True if this is the server's built-in custom_models.json config file
|
||||
"""
|
||||
try:
|
||||
path = Path(path_str)
|
||||
|
||||
# Get the server root by going up from this file: utils/file_utils.py -> server_root
|
||||
server_root = Path(__file__).parent.parent
|
||||
builtin_config = server_root / "conf" / "custom_models.json"
|
||||
|
||||
# Check if the path resolves to the same file as our built-in config
|
||||
# This handles both relative and absolute paths to the same file
|
||||
return path.resolve() == builtin_config.resolve()
|
||||
|
||||
except Exception:
|
||||
# If path resolution fails, it's not our built-in config
|
||||
return False
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -271,7 +301,8 @@ def translate_path_for_environment(path_str: str) -> str:
|
||||
tools and utilities throughout the codebase. It handles:
|
||||
1. Docker host-to-container path translation (host paths -> /workspace/...)
|
||||
2. Direct mode (no translation needed)
|
||||
3. Security validation and error handling
|
||||
3. Internal server files (conf/custom_models.json)
|
||||
4. Security validation and error handling
|
||||
|
||||
Docker Path Translation Logic:
|
||||
- Input: /Users/john/project/src/file.py (host path from Claude)
|
||||
@@ -284,6 +315,10 @@ def translate_path_for_environment(path_str: str) -> str:
|
||||
Returns:
|
||||
Translated path appropriate for the current environment
|
||||
"""
|
||||
# Handle built-in server config file - no translation needed
|
||||
if _is_builtin_custom_models_config(path_str):
|
||||
return path_str
|
||||
|
||||
if not WORKSPACE_ROOT or not WORKSPACE_ROOT.strip() or not CONTAINER_WORKSPACE.exists():
|
||||
# Not in the configured Docker environment, no translation needed
|
||||
return path_str
|
||||
|
||||
Reference in New Issue
Block a user