Vision support via images / pdfs etc that can be passed on to other models as part of analysis, additional context etc.

Image processing pipeline added OpenAI GPT-4.1 support Chat tool prompt enhancement Lint and code quality improvements
2025-06-16 13:14:53 +04:00
parent d498e9854b
commit 97fa6781cf
26 changed files with 1328 additions and 52 deletions
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -142,6 +142,7 @@ class ConversationTurn(BaseModel):
        content: The actual message content/response
        timestamp: ISO timestamp when this turn was created
        files: List of file paths referenced in this specific turn
+        images: List of image paths referenced in this specific turn
        tool_name: Which tool generated this turn (for cross-tool tracking)
        model_provider: Provider used (e.g., "google", "openai")
        model_name: Specific model used (e.g., "gemini-2.5-flash-preview-05-20", "o3-mini")
@@ -152,6 +153,7 @@ class ConversationTurn(BaseModel):
    content: str
    timestamp: str
    files: Optional[list[str]] = None  # Files referenced in this turn
+    images: Optional[list[str]] = None  # Images referenced in this turn
    tool_name: Optional[str] = None  # Tool used for this turn
    model_provider: Optional[str] = None  # Model provider (google, openai, etc)
    model_name: Optional[str] = None  # Specific model used
@@ -300,6 +302,7 @@ def add_turn(
    role: str,
    content: str,
    files: Optional[list[str]] = None,
+    images: Optional[list[str]] = None,
    tool_name: Optional[str] = None,
    model_provider: Optional[str] = None,
    model_name: Optional[str] = None,
@@ -318,6 +321,7 @@ def add_turn(
        role: "user" (Claude) or "assistant" (Gemini/O3/etc)
        content: The actual message/response content
        files: Optional list of files referenced in this turn
+        images: Optional list of images referenced in this turn
        tool_name: Name of the tool adding this turn (for attribution)
        model_provider: Provider used (e.g., "google", "openai")
        model_name: Specific model used (e.g., "gemini-2.5-flash-preview-05-20", "o3-mini")
@@ -335,6 +339,7 @@ def add_turn(
        - Refreshes thread TTL to configured timeout on successful update
        - Turn limits prevent runaway conversations
        - File references are preserved for cross-tool access with atomic ordering
+        - Image references are preserved for cross-tool visual context
        - Model information enables cross-provider conversations
    """
    logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
@@ -355,6 +360,7 @@ def add_turn(
        content=content,
        timestamp=datetime.now(timezone.utc).isoformat(),
        files=files,  # Preserved for cross-tool file context
+        images=images,  # Preserved for cross-tool visual context
        tool_name=tool_name,  # Track which tool generated this turn
        model_provider=model_provider,  # Track model provider
        model_name=model_name,  # Track specific model
@@ -489,6 +495,78 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
    return file_list


+def get_conversation_image_list(context: ThreadContext) -> list[str]:
+    """
+    Extract all unique images from conversation turns with newest-first prioritization.
+
+    This function implements the identical prioritization logic as get_conversation_file_list()
+    to ensure consistency in how images are handled across conversation turns. It walks
+    backwards through conversation turns (from newest to oldest) and collects unique image
+    references, ensuring that when the same image appears in multiple turns, the reference
+    from the NEWEST turn takes precedence.
+
+    PRIORITIZATION ALGORITHM:
+    1. Iterate through turns in REVERSE order (index len-1 down to 0)
+    2. For each turn, process images in the order they appear in turn.images
+    3. Add image to result list only if not already seen (newest reference wins)
+    4. Skip duplicate images that were already added from newer turns
+
+    This ensures that:
+    - Images from newer conversation turns appear first in the result
+    - When the same image is referenced multiple times, only the newest reference is kept
+    - The order reflects the most recent conversation context
+
+    Example:
+        Turn 1: images = ["diagram.png", "flow.jpg"]
+        Turn 2: images = ["error.png"]
+        Turn 3: images = ["diagram.png", "updated.png"]  # diagram.png appears again
+
+        Result: ["diagram.png", "updated.png", "error.png", "flow.jpg"]
+        (diagram.png from Turn 3 takes precedence over Turn 1)
+
+    Args:
+        context: ThreadContext containing all conversation turns to process
+
+    Returns:
+        list[str]: Unique image paths ordered by newest reference first.
+                   Empty list if no turns exist or no images are referenced.
+
+    Performance:
+        - Time Complexity: O(n*m) where n=turns, m=avg images per turn
+        - Space Complexity: O(i) where i=total unique images
+        - Uses set for O(1) duplicate detection
+    """
+    if not context.turns:
+        logger.debug("[IMAGES] No turns found, returning empty image list")
+        return []
+
+    # Collect images by walking backwards (newest to oldest turns)
+    seen_images = set()
+    image_list = []
+
+    logger.debug(f"[IMAGES] Collecting images from {len(context.turns)} turns (newest first)")
+
+    # Process turns in reverse order (newest first) - this is the CORE of newest-first prioritization
+    # By iterating from len-1 down to 0, we encounter newer turns before older turns
+    # When we find a duplicate image, we skip it because the newer version is already in our list
+    for i in range(len(context.turns) - 1, -1, -1):  # REVERSE: newest turn first
+        turn = context.turns[i]
+        if turn.images:
+            logger.debug(f"[IMAGES] Turn {i + 1} has {len(turn.images)} images: {turn.images}")
+            for image_path in turn.images:
+                if image_path not in seen_images:
+                    # First time seeing this image - add it (this is the NEWEST reference)
+                    seen_images.add(image_path)
+                    image_list.append(image_path)
+                    logger.debug(f"[IMAGES] Added new image: {image_path} (from turn {i + 1})")
+                else:
+                    # Image already seen from a NEWER turn - skip this older reference
+                    logger.debug(f"[IMAGES] Skipping duplicate image: {image_path} (newer version already included)")
+
+    logger.debug(f"[IMAGES] Final image list ({len(image_list)}): {image_list}")
+    return image_list
+
+
 def _plan_file_inclusion_by_size(all_files: list[str], max_file_tokens: int) -> tuple[list[str], list[str], int]:
    """
    Plan which files to include based on size constraints.