Improved documentation for conversation / file collection strategy, context budget allocation etc

2025-06-16 07:17:35 +04:00
parent 5a49d196c8
commit 4c0bd3b86d
5 changed files with 288 additions and 35 deletions
--- a/server.py
+++ b/server.py
@@ -364,20 +364,57 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
    """
    Handle incoming tool execution requests from MCP clients.

-    This is the main request dispatcher that routes tool calls to their
-    appropriate handlers. It supports both AI-powered tools (from TOOLS registry)
-    and utility tools (implemented as static functions).
+    This is the main request dispatcher that routes tool calls to their appropriate handlers. 
+    It supports both AI-powered tools (from TOOLS registry) and utility tools (implemented as 
+    static functions).

-    Thread Context Reconstruction:
-    If the request contains a continuation_id, this function reconstructs
-    the conversation history and injects it into the tool's context.
+    CONVERSATION LIFECYCLE MANAGEMENT:
+    This function serves as the central orchestrator for multi-turn AI-to-AI conversations:
+
+    1. THREAD RESUMPTION: When continuation_id is present, it reconstructs complete conversation
+       context from Redis including conversation history and file references
+    
+    2. CROSS-TOOL CONTINUATION: Enables seamless handoffs between different tools (analyze → 
+       codereview → debug) while preserving full conversation context and file references
+    
+    3. CONTEXT INJECTION: Reconstructed conversation history is embedded into tool prompts
+       using the dual prioritization strategy:
+       - Files: Newest-first prioritization (recent file versions take precedence)
+       - Turns: Newest-first collection for token efficiency, chronological presentation for LLM
+    
+    4. FOLLOW-UP GENERATION: After tool execution, generates continuation offers for ongoing
+       AI-to-AI collaboration with natural language instructions
+
+    STATELESS TO STATEFUL BRIDGE:
+    The MCP protocol is inherently stateless, but this function bridges the gap by:
+    - Loading persistent conversation state from Redis
+    - Reconstructing full multi-turn context for tool execution
+    - Enabling tools to access previous exchanges and file references
+    - Supporting conversation chains across different tool types

    Args:
-        name: The name of the tool to execute
-        arguments: Dictionary of arguments to pass to the tool
+        name: The name of the tool to execute (e.g., "analyze", "chat", "codereview")
+        arguments: Dictionary of arguments to pass to the tool, potentially including:
+                  - continuation_id: UUID for conversation thread resumption
+                  - files: File paths for analysis (subject to deduplication)
+                  - prompt: User request or follow-up question
+                  - model: Specific AI model to use (optional)

    Returns:
-        List of TextContent objects containing the tool's response
+        List of TextContent objects containing:
+        - Tool's primary response with analysis/results
+        - Continuation offers for follow-up conversations (when applicable)
+        - Structured JSON responses with status and content
+
+    Raises:
+        ValueError: If continuation_id is invalid or conversation thread not found
+        Exception: For tool-specific errors or execution failures
+
+    Example Conversation Flow:
+        1. Claude calls analyze tool with files → creates new thread
+        2. Thread ID returned in continuation offer
+        3. Claude continues with codereview tool + continuation_id → full context preserved
+        4. Multiple tools can collaborate using same thread ID
    """
    logger.info(f"MCP tool call: {name}")
    logger.debug(f"MCP tool arguments: {list(arguments.keys())}")
@@ -492,16 +529,82 @@ Remember: Only suggest follow-ups when they would genuinely add value to the dis

 async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any]:
    """
-    Reconstruct conversation context for thread continuation.
+    Reconstruct conversation context for stateless-to-stateful thread continuation.

-    This function loads the conversation history from Redis and integrates it
-    into the request arguments to provide full context to the tool.
+    This is a critical function that transforms the inherently stateless MCP protocol into 
+    stateful multi-turn conversations. It loads persistent conversation state from Redis
+    and rebuilds complete conversation context using the sophisticated dual prioritization
+    strategy implemented in the conversation memory system.
+
+    CONTEXT RECONSTRUCTION PROCESS:
+    
+    1. THREAD RETRIEVAL: Loads complete ThreadContext from Redis using continuation_id
+       - Includes all conversation turns with tool attribution
+       - Preserves file references and cross-tool context
+       - Handles conversation chains across multiple linked threads
+    
+    2. CONVERSATION HISTORY BUILDING: Uses build_conversation_history() to create
+       comprehensive context with intelligent prioritization:
+       
+       FILE PRIORITIZATION (Newest-First Throughout):
+       - When same file appears in multiple turns, newest reference wins
+       - File embedding prioritizes recent versions, excludes older duplicates
+       - Token budget management ensures most relevant files are preserved
+       
+       CONVERSATION TURN PRIORITIZATION (Dual Strategy):
+       - Collection Phase: Processes turns newest-to-oldest for token efficiency
+       - Presentation Phase: Presents turns chronologically for LLM understanding
+       - Ensures recent context is preserved when token budget is constrained
+    
+    3. CONTEXT INJECTION: Embeds reconstructed history into tool request arguments
+       - Conversation history becomes part of the tool's prompt context
+       - Files referenced in previous turns are accessible to current tool
+       - Cross-tool knowledge transfer is seamless and comprehensive
+    
+    4. TOKEN BUDGET MANAGEMENT: Applies model-specific token allocation
+       - Balances conversation history vs. file content vs. response space
+       - Gracefully handles token limits with intelligent exclusion strategies
+       - Preserves most contextually relevant information within constraints
+
+    CROSS-TOOL CONTINUATION SUPPORT:
+    This function enables seamless handoffs between different tools:
+    - Analyze tool → Debug tool: Full file context and analysis preserved
+    - Chat tool → CodeReview tool: Conversation context maintained
+    - Any tool → Any tool: Complete cross-tool knowledge transfer
+
+    ERROR HANDLING & RECOVERY:
+    - Thread expiration: Provides clear instructions for conversation restart
+    - Redis unavailability: Graceful degradation with error messaging
+    - Invalid continuation_id: Security validation and user-friendly errors

    Args:
-        arguments: Original request arguments containing continuation_id
+        arguments: Original request arguments dictionary containing:
+                  - continuation_id (required): UUID of conversation thread to resume
+                  - Other tool-specific arguments that will be preserved

    Returns:
-        Modified arguments with conversation history injected
+        dict[str, Any]: Enhanced arguments dictionary with conversation context:
+        - Original arguments preserved
+        - Conversation history embedded in appropriate format for tool consumption
+        - File context from previous turns made accessible
+        - Cross-tool knowledge transfer enabled
+
+    Raises:
+        ValueError: When continuation_id is invalid, thread not found, or expired
+                   Includes user-friendly recovery instructions
+
+    Performance Characteristics:
+        - O(1) thread lookup in Redis
+        - O(n) conversation history reconstruction where n = number of turns
+        - Intelligent token budgeting prevents context window overflow
+        - Optimized file deduplication minimizes redundant content
+
+    Example Usage Flow:
+        1. Claude: "Continue analyzing the security issues" + continuation_id
+        2. reconstruct_thread_context() loads previous analyze conversation
+        3. Debug tool receives full context including previous file analysis
+        4. Debug tool can reference specific findings from analyze tool
+        5. Natural cross-tool collaboration without context loss
    """
    from utils.conversation_memory import add_turn, build_conversation_history, get_thread

--- a/tools/base.py
+++ b/tools/base.py
@@ -93,6 +93,30 @@ class BaseTool(ABC):
    This class defines the interface that all tools must implement and provides
    common functionality for request handling, model creation, and response formatting.

+    CONVERSATION-AWARE FILE PROCESSING:
+    This base class implements the sophisticated dual prioritization strategy for
+    conversation-aware file handling across all tools:
+
+    1. FILE DEDUPLICATION WITH NEWEST-FIRST PRIORITY:
+       - When same file appears in multiple conversation turns, newest reference wins
+       - Prevents redundant file embedding while preserving most recent file state
+       - Cross-tool file tracking ensures consistent behavior across analyze → codereview → debug
+
+    2. CONVERSATION CONTEXT INTEGRATION:
+       - All tools receive enhanced prompts with conversation history via reconstruct_thread_context()
+       - File references from previous turns are preserved and accessible
+       - Cross-tool knowledge transfer maintains full context without manual file re-specification
+
+    3. TOKEN-AWARE FILE EMBEDDING:
+       - Respects model-specific token allocation budgets from ModelContext
+       - Prioritizes conversation history, then newest files, then remaining content
+       - Graceful degradation when token limits are approached
+
+    4. STATELESS-TO-STATEFUL BRIDGING:
+       - Tools operate on stateless MCP requests but access full conversation state
+       - Conversation memory automatically injected via continuation_id parameter
+       - Enables natural AI-to-AI collaboration across tool boundaries
+
    To create a new tool:
    1. Create a new class that inherits from BaseTool
    2. Implement all abstract methods
@@ -546,12 +570,33 @@ class BaseTool(ABC):
        arguments: Optional[dict] = None,
    ) -> tuple[str, list[str]]:
        """
-        Centralized file processing for tool prompts.
+        Centralized file processing implementing dual prioritization strategy.

-        This method handles the common pattern across all tools:
-        1. Filter out files already embedded in conversation history
-        2. Read content of only new files
-        3. Generate informative note about skipped files
+        DUAL PRIORITIZATION STRATEGY CORE IMPLEMENTATION:
+        This method is the heart of conversation-aware file processing across all tools:
+
+        1. CONVERSATION-AWARE FILE DEDUPLICATION:
+           - Automatically detects and filters files already embedded in conversation history
+           - Implements newest-first prioritization: when same file appears in multiple turns,
+             only the newest reference is preserved to avoid redundant content
+           - Cross-tool file tracking ensures consistent behavior across tool boundaries
+
+        2. TOKEN-BUDGET OPTIMIZATION:
+           - Respects remaining token budget from conversation context reconstruction
+           - Prioritizes conversation history + newest file versions within constraints
+           - Graceful degradation when token limits approached (newest files preserved first)
+           - Model-specific token allocation ensures optimal context window utilization
+
+        3. CROSS-TOOL CONTINUATION SUPPORT:
+           - File references persist across different tools (analyze → codereview → debug)
+           - Previous tool file embeddings are tracked and excluded from new embeddings
+           - Maintains complete file context without manual re-specification
+
+        PROCESSING WORKFLOW:
+        1. Filter out files already embedded in conversation history using newest-first priority
+        2. Read content of only new files within remaining token budget
+        3. Generate informative notes about skipped files for user transparency
+        4. Return formatted content ready for prompt inclusion

        Args:
            request_files: List of files requested for current tool execution
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -39,10 +39,12 @@ Key Features:
 - Thread-safe operations for concurrent access
 - Graceful degradation when Redis is unavailable

-FILE PRIORITIZATION STRATEGY:
-The conversation memory system implements a sophisticated file prioritization algorithm
-that ensures newer file references always take precedence over older ones:
+DUAL PRIORITIZATION STRATEGY (Files & Conversations):
+The conversation memory system implements sophisticated prioritization for both files and 
+conversation turns, using a consistent "newest-first" approach during collection but 
+presenting information in the optimal format for LLM consumption:

+FILE PRIORITIZATION (Newest-First Throughout):
 1. When collecting files across conversation turns, the system walks BACKWARDS through
   turns (newest to oldest) and builds a unique file list
 2. If the same file path appears in multiple turns, only the reference from the
@@ -54,8 +56,16 @@ that ensures newer file references always take precedence over older ones:
 4. This strategy works across conversation chains - files from newer turns in ANY
   thread take precedence over files from older turns in ANY thread

-This approach ensures that when token limits force file exclusions, the most
-recently referenced and contextually relevant files are preserved.
+CONVERSATION TURN PRIORITIZATION (Newest-First Collection, Chronological Presentation):
+1. COLLECTION PHASE: Processes turns newest-to-oldest to prioritize recent context
+   - When token budget is tight, OLDER turns are excluded first
+   - Ensures most contextually relevant recent exchanges are preserved
+2. PRESENTATION PHASE: Reverses collected turns to chronological order (oldest-first)
+   - LLM sees natural conversation flow: "Turn 1 → Turn 2 → Turn 3..."
+   - Maintains proper sequential understanding while preserving recency prioritization
+
+This dual approach ensures optimal context preservation (newest-first) with natural 
+conversation flow (chronological) for maximum LLM comprehension and relevance.

 USAGE EXAMPLE:
 1. Tool A creates thread: create_thread("analyze", request_data) → returns UUID
@@ -64,7 +74,20 @@ USAGE EXAMPLE:
 4. Tool B sees conversation history via build_conversation_history()
 5. Tool B adds its response: add_turn(UUID, "assistant", response, tool_name="codereview")

-This enables true AI-to-AI collaboration across the entire tool ecosystem.
+DUAL STRATEGY EXAMPLE:
+Conversation has 5 turns, token budget allows only 3 turns:
+
+Collection Phase (Newest-First Priority):
+- Evaluates: Turn 5 → Turn 4 → Turn 3 → Turn 2 → Turn 1
+- Includes: Turn 5, Turn 4, Turn 3 (newest 3 fit in budget)
+- Excludes: Turn 2, Turn 1 (oldest, dropped due to token limits)
+
+Presentation Phase (Chronological Order):
+- LLM sees: "--- Turn 3 (Claude) ---", "--- Turn 4 (Gemini) ---", "--- Turn 5 (Claude) ---"
+- Natural conversation flow maintained despite prioritizing recent context
+
+This enables true AI-to-AI collaboration across the entire tool ecosystem with optimal
+context preservation and natural conversation understanding.
 """

 import logging
@@ -543,10 +566,27 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
    to include complete conversation history across multiple linked threads. File
    prioritization works across the entire chain, not just the current thread.

+    CONVERSATION TURN ORDERING STRATEGY:
+    The function employs a sophisticated two-phase approach for optimal token utilization:
+    
+    PHASE 1 - COLLECTION (Newest-First for Token Budget):
+    - Processes conversation turns in REVERSE chronological order (newest to oldest)
+    - Prioritizes recent turns within token constraints
+    - If token budget is exceeded, OLDER turns are excluded first
+    - Ensures the most contextually relevant recent exchanges are preserved
+    
+    PHASE 2 - PRESENTATION (Chronological for LLM Understanding):
+    - Reverses the collected turns back to chronological order (oldest to newest)
+    - Presents conversation flow naturally for LLM comprehension
+    - Maintains "--- Turn 1, Turn 2, Turn 3..." sequential numbering
+    - Enables LLM to follow conversation progression logically
+    
+    This approach balances recency prioritization with natural conversation flow.
+
    TOKEN MANAGEMENT:
    - Uses model-specific token allocation (file_tokens + history_tokens)
    - Files are embedded ONCE at the start to prevent duplication
-    - Conversation turns are processed newest-first but presented chronologically
+    - Turn collection prioritizes newest-first, presentation shows chronologically
    - Stops adding turns when token budget would be exceeded
    - Gracefully handles token limits with informative notes

@@ -770,13 +810,16 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_

    history_parts.append("Previous conversation turns:")

-    # Build conversation turns bottom-up (most recent first) but present chronologically
-    # This ensures we include as many recent turns as possible within the token budget
-    turn_entries = []  # Will store (index, formatted_turn_content) for chronological ordering
+    # === PHASE 1: COLLECTION (Newest-First for Token Budget) ===
+    # Build conversation turns bottom-up (most recent first) to prioritize recent context within token limits
+    # This ensures we include as many recent turns as possible within the token budget by excluding
+    # OLDER turns first when space runs out, preserving the most contextually relevant exchanges
+    turn_entries = []  # Will store (index, formatted_turn_content) for chronological ordering later
    total_turn_tokens = 0
    file_embedding_tokens = sum(model_context.estimate_tokens(part) for part in history_parts)

-    # Process turns in reverse order (most recent first) to prioritize recent context
+    # CRITICAL: Process turns in REVERSE chronological order (newest to oldest)
+    # This prioritization strategy ensures recent context is preserved when token budget is tight
    for idx in range(len(all_turns) - 1, -1, -1):
        turn = all_turns[idx]
        turn_num = idx + 1
@@ -821,14 +864,19 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
            logger.debug(f"[HISTORY]   Budget: {max_history_tokens:,}")
            break

-        # Add this turn to our list (we'll reverse it later for chronological order)
+        # Add this turn to our collection (we'll reverse it later for chronological presentation)
+        # Store the original index to maintain proper turn numbering in final output
        turn_entries.append((idx, turn_content))
        total_turn_tokens += turn_tokens

-    # Reverse to get chronological order (oldest first)
+    # === PHASE 2: PRESENTATION (Chronological for LLM Understanding) ===
+    # Reverse the collected turns to restore chronological order (oldest first)
+    # This gives the LLM a natural conversation flow: Turn 1 → Turn 2 → Turn 3...
+    # while still having prioritized recent turns during the token-constrained collection phase
    turn_entries.reverse()

-    # Add the turns in chronological order
+    # Add the turns in chronological order for natural LLM comprehension
+    # The LLM will see: "--- Turn 1 (Claude) ---" followed by "--- Turn 2 (Gemini) ---" etc.
    for _, turn_content in turn_entries:
        history_parts.append(turn_content)

--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -16,6 +16,25 @@ Security Model:
 - All file access is restricted to PROJECT_ROOT and its subdirectories
 - Absolute paths are required to prevent ambiguity
 - Symbolic links are resolved to ensure they stay within bounds
+
+CONVERSATION MEMORY INTEGRATION:
+This module works with the conversation memory system to support efficient
+multi-turn file handling:
+
+1. DEDUPLICATION SUPPORT:
+   - File reading functions are called by conversation-aware tools
+   - Supports newest-first file prioritization by providing accurate token estimation
+   - Enables efficient file content caching and token budget management
+
+2. TOKEN BUDGET OPTIMIZATION:
+   - Provides accurate token estimation for file content before reading
+   - Supports the dual prioritization strategy by enabling precise budget calculations
+   - Enables tools to make informed decisions about which files to include
+
+3. CROSS-TOOL FILE PERSISTENCE:
+   - File reading results are used across different tools in conversation chains
+   - Consistent file access patterns support conversation continuation scenarios
+   - Error handling preserves conversation flow when files become unavailable
 """

 import json
--- a/utils/model_context.py
+++ b/utils/model_context.py
@@ -4,6 +4,26 @@ Model context management for dynamic token allocation.
 This module provides a clean abstraction for model-specific token management,
 ensuring that token limits are properly calculated based on the current model
 being used, not global constants.
+
+CONVERSATION MEMORY INTEGRATION:
+This module works closely with the conversation memory system to provide
+optimal token allocation for multi-turn conversations:
+
+1. DUAL PRIORITIZATION STRATEGY SUPPORT:
+   - Provides separate token budgets for conversation history vs. files
+   - Enables the conversation memory system to apply newest-first prioritization
+   - Ensures optimal balance between context preservation and new content
+
+2. MODEL-SPECIFIC ALLOCATION:
+   - Dynamic allocation based on model capabilities (context window size)
+   - Conservative allocation for smaller models (O3: 200K context)
+   - Generous allocation for larger models (Gemini: 1M+ context)
+   - Adapts token distribution ratios based on model capacity
+
+3. CROSS-TOOL CONSISTENCY:
+   - Provides consistent token budgets across different tools
+   - Enables seamless conversation continuation between tools
+   - Supports conversation reconstruction with proper budget management
 """

 import logging
@@ -64,13 +84,31 @@ class ModelContext:

    def calculate_token_allocation(self, reserved_for_response: Optional[int] = None) -> TokenAllocation:
        """
-        Calculate token allocation based on model capacity.
+        Calculate token allocation based on model capacity and conversation requirements.
+
+        This method implements the core token budget calculation that supports the
+        dual prioritization strategy used in conversation memory and file processing:
+
+        TOKEN ALLOCATION STRATEGY:
+        1. CONTENT vs RESPONSE SPLIT:
+           - Smaller models (< 300K): 60% content, 40% response (conservative)
+           - Larger models (≥ 300K): 80% content, 20% response (generous)
+
+        2. CONTENT SUB-ALLOCATION:
+           - File tokens: 30-40% of content budget for newest file versions
+           - History tokens: 40-50% of content budget for conversation context
+           - Remaining: Available for tool-specific prompt content
+
+        3. CONVERSATION MEMORY INTEGRATION:
+           - History allocation enables conversation reconstruction in reconstruct_thread_context()
+           - File allocation supports newest-first file prioritization in tools
+           - Remaining budget passed to tools via _remaining_tokens parameter

        Args:
            reserved_for_response: Override response token reservation

        Returns:
-            TokenAllocation with calculated budgets
+            TokenAllocation with calculated budgets for dual prioritization strategy
        """
        total_tokens = self.capabilities.context_window