From 4c0bd3b86d16ede05007503273d039e37d63b5ed Mon Sep 17 00:00:00 2001 From: Fahad Date: Mon, 16 Jun 2025 07:17:35 +0400 Subject: [PATCH] Improved documentation for conversation / file collection strategy, context budget allocation etc --- server.py | 131 +++++++++++++++++++++++++++++++---- tools/base.py | 55 +++++++++++++-- utils/conversation_memory.py | 76 ++++++++++++++++---- utils/file_utils.py | 19 +++++ utils/model_context.py | 42 ++++++++++- 5 files changed, 288 insertions(+), 35 deletions(-) diff --git a/server.py b/server.py index 29003a4..a9b1f71 100644 --- a/server.py +++ b/server.py @@ -364,20 +364,57 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon """ Handle incoming tool execution requests from MCP clients. - This is the main request dispatcher that routes tool calls to their - appropriate handlers. It supports both AI-powered tools (from TOOLS registry) - and utility tools (implemented as static functions). + This is the main request dispatcher that routes tool calls to their appropriate handlers. + It supports both AI-powered tools (from TOOLS registry) and utility tools (implemented as + static functions). - Thread Context Reconstruction: - If the request contains a continuation_id, this function reconstructs - the conversation history and injects it into the tool's context. + CONVERSATION LIFECYCLE MANAGEMENT: + This function serves as the central orchestrator for multi-turn AI-to-AI conversations: + + 1. THREAD RESUMPTION: When continuation_id is present, it reconstructs complete conversation + context from Redis including conversation history and file references + + 2. CROSS-TOOL CONTINUATION: Enables seamless handoffs between different tools (analyze → + codereview → debug) while preserving full conversation context and file references + + 3. CONTEXT INJECTION: Reconstructed conversation history is embedded into tool prompts + using the dual prioritization strategy: + - Files: Newest-first prioritization (recent file versions take precedence) + - Turns: Newest-first collection for token efficiency, chronological presentation for LLM + + 4. FOLLOW-UP GENERATION: After tool execution, generates continuation offers for ongoing + AI-to-AI collaboration with natural language instructions + + STATELESS TO STATEFUL BRIDGE: + The MCP protocol is inherently stateless, but this function bridges the gap by: + - Loading persistent conversation state from Redis + - Reconstructing full multi-turn context for tool execution + - Enabling tools to access previous exchanges and file references + - Supporting conversation chains across different tool types Args: - name: The name of the tool to execute - arguments: Dictionary of arguments to pass to the tool + name: The name of the tool to execute (e.g., "analyze", "chat", "codereview") + arguments: Dictionary of arguments to pass to the tool, potentially including: + - continuation_id: UUID for conversation thread resumption + - files: File paths for analysis (subject to deduplication) + - prompt: User request or follow-up question + - model: Specific AI model to use (optional) Returns: - List of TextContent objects containing the tool's response + List of TextContent objects containing: + - Tool's primary response with analysis/results + - Continuation offers for follow-up conversations (when applicable) + - Structured JSON responses with status and content + + Raises: + ValueError: If continuation_id is invalid or conversation thread not found + Exception: For tool-specific errors or execution failures + + Example Conversation Flow: + 1. Claude calls analyze tool with files → creates new thread + 2. Thread ID returned in continuation offer + 3. Claude continues with codereview tool + continuation_id → full context preserved + 4. Multiple tools can collaborate using same thread ID """ logger.info(f"MCP tool call: {name}") logger.debug(f"MCP tool arguments: {list(arguments.keys())}") @@ -492,16 +529,82 @@ Remember: Only suggest follow-ups when they would genuinely add value to the dis async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any]: """ - Reconstruct conversation context for thread continuation. + Reconstruct conversation context for stateless-to-stateful thread continuation. - This function loads the conversation history from Redis and integrates it - into the request arguments to provide full context to the tool. + This is a critical function that transforms the inherently stateless MCP protocol into + stateful multi-turn conversations. It loads persistent conversation state from Redis + and rebuilds complete conversation context using the sophisticated dual prioritization + strategy implemented in the conversation memory system. + + CONTEXT RECONSTRUCTION PROCESS: + + 1. THREAD RETRIEVAL: Loads complete ThreadContext from Redis using continuation_id + - Includes all conversation turns with tool attribution + - Preserves file references and cross-tool context + - Handles conversation chains across multiple linked threads + + 2. CONVERSATION HISTORY BUILDING: Uses build_conversation_history() to create + comprehensive context with intelligent prioritization: + + FILE PRIORITIZATION (Newest-First Throughout): + - When same file appears in multiple turns, newest reference wins + - File embedding prioritizes recent versions, excludes older duplicates + - Token budget management ensures most relevant files are preserved + + CONVERSATION TURN PRIORITIZATION (Dual Strategy): + - Collection Phase: Processes turns newest-to-oldest for token efficiency + - Presentation Phase: Presents turns chronologically for LLM understanding + - Ensures recent context is preserved when token budget is constrained + + 3. CONTEXT INJECTION: Embeds reconstructed history into tool request arguments + - Conversation history becomes part of the tool's prompt context + - Files referenced in previous turns are accessible to current tool + - Cross-tool knowledge transfer is seamless and comprehensive + + 4. TOKEN BUDGET MANAGEMENT: Applies model-specific token allocation + - Balances conversation history vs. file content vs. response space + - Gracefully handles token limits with intelligent exclusion strategies + - Preserves most contextually relevant information within constraints + + CROSS-TOOL CONTINUATION SUPPORT: + This function enables seamless handoffs between different tools: + - Analyze tool → Debug tool: Full file context and analysis preserved + - Chat tool → CodeReview tool: Conversation context maintained + - Any tool → Any tool: Complete cross-tool knowledge transfer + + ERROR HANDLING & RECOVERY: + - Thread expiration: Provides clear instructions for conversation restart + - Redis unavailability: Graceful degradation with error messaging + - Invalid continuation_id: Security validation and user-friendly errors Args: - arguments: Original request arguments containing continuation_id + arguments: Original request arguments dictionary containing: + - continuation_id (required): UUID of conversation thread to resume + - Other tool-specific arguments that will be preserved Returns: - Modified arguments with conversation history injected + dict[str, Any]: Enhanced arguments dictionary with conversation context: + - Original arguments preserved + - Conversation history embedded in appropriate format for tool consumption + - File context from previous turns made accessible + - Cross-tool knowledge transfer enabled + + Raises: + ValueError: When continuation_id is invalid, thread not found, or expired + Includes user-friendly recovery instructions + + Performance Characteristics: + - O(1) thread lookup in Redis + - O(n) conversation history reconstruction where n = number of turns + - Intelligent token budgeting prevents context window overflow + - Optimized file deduplication minimizes redundant content + + Example Usage Flow: + 1. Claude: "Continue analyzing the security issues" + continuation_id + 2. reconstruct_thread_context() loads previous analyze conversation + 3. Debug tool receives full context including previous file analysis + 4. Debug tool can reference specific findings from analyze tool + 5. Natural cross-tool collaboration without context loss """ from utils.conversation_memory import add_turn, build_conversation_history, get_thread diff --git a/tools/base.py b/tools/base.py index 47bdf04..d72b7f5 100644 --- a/tools/base.py +++ b/tools/base.py @@ -93,6 +93,30 @@ class BaseTool(ABC): This class defines the interface that all tools must implement and provides common functionality for request handling, model creation, and response formatting. + CONVERSATION-AWARE FILE PROCESSING: + This base class implements the sophisticated dual prioritization strategy for + conversation-aware file handling across all tools: + + 1. FILE DEDUPLICATION WITH NEWEST-FIRST PRIORITY: + - When same file appears in multiple conversation turns, newest reference wins + - Prevents redundant file embedding while preserving most recent file state + - Cross-tool file tracking ensures consistent behavior across analyze → codereview → debug + + 2. CONVERSATION CONTEXT INTEGRATION: + - All tools receive enhanced prompts with conversation history via reconstruct_thread_context() + - File references from previous turns are preserved and accessible + - Cross-tool knowledge transfer maintains full context without manual file re-specification + + 3. TOKEN-AWARE FILE EMBEDDING: + - Respects model-specific token allocation budgets from ModelContext + - Prioritizes conversation history, then newest files, then remaining content + - Graceful degradation when token limits are approached + + 4. STATELESS-TO-STATEFUL BRIDGING: + - Tools operate on stateless MCP requests but access full conversation state + - Conversation memory automatically injected via continuation_id parameter + - Enables natural AI-to-AI collaboration across tool boundaries + To create a new tool: 1. Create a new class that inherits from BaseTool 2. Implement all abstract methods @@ -546,12 +570,33 @@ class BaseTool(ABC): arguments: Optional[dict] = None, ) -> tuple[str, list[str]]: """ - Centralized file processing for tool prompts. + Centralized file processing implementing dual prioritization strategy. - This method handles the common pattern across all tools: - 1. Filter out files already embedded in conversation history - 2. Read content of only new files - 3. Generate informative note about skipped files + DUAL PRIORITIZATION STRATEGY CORE IMPLEMENTATION: + This method is the heart of conversation-aware file processing across all tools: + + 1. CONVERSATION-AWARE FILE DEDUPLICATION: + - Automatically detects and filters files already embedded in conversation history + - Implements newest-first prioritization: when same file appears in multiple turns, + only the newest reference is preserved to avoid redundant content + - Cross-tool file tracking ensures consistent behavior across tool boundaries + + 2. TOKEN-BUDGET OPTIMIZATION: + - Respects remaining token budget from conversation context reconstruction + - Prioritizes conversation history + newest file versions within constraints + - Graceful degradation when token limits approached (newest files preserved first) + - Model-specific token allocation ensures optimal context window utilization + + 3. CROSS-TOOL CONTINUATION SUPPORT: + - File references persist across different tools (analyze → codereview → debug) + - Previous tool file embeddings are tracked and excluded from new embeddings + - Maintains complete file context without manual re-specification + + PROCESSING WORKFLOW: + 1. Filter out files already embedded in conversation history using newest-first priority + 2. Read content of only new files within remaining token budget + 3. Generate informative notes about skipped files for user transparency + 4. Return formatted content ready for prompt inclusion Args: request_files: List of files requested for current tool execution diff --git a/utils/conversation_memory.py b/utils/conversation_memory.py index 6e7c88e..48b38cf 100644 --- a/utils/conversation_memory.py +++ b/utils/conversation_memory.py @@ -39,10 +39,12 @@ Key Features: - Thread-safe operations for concurrent access - Graceful degradation when Redis is unavailable -FILE PRIORITIZATION STRATEGY: -The conversation memory system implements a sophisticated file prioritization algorithm -that ensures newer file references always take precedence over older ones: +DUAL PRIORITIZATION STRATEGY (Files & Conversations): +The conversation memory system implements sophisticated prioritization for both files and +conversation turns, using a consistent "newest-first" approach during collection but +presenting information in the optimal format for LLM consumption: +FILE PRIORITIZATION (Newest-First Throughout): 1. When collecting files across conversation turns, the system walks BACKWARDS through turns (newest to oldest) and builds a unique file list 2. If the same file path appears in multiple turns, only the reference from the @@ -54,8 +56,16 @@ that ensures newer file references always take precedence over older ones: 4. This strategy works across conversation chains - files from newer turns in ANY thread take precedence over files from older turns in ANY thread -This approach ensures that when token limits force file exclusions, the most -recently referenced and contextually relevant files are preserved. +CONVERSATION TURN PRIORITIZATION (Newest-First Collection, Chronological Presentation): +1. COLLECTION PHASE: Processes turns newest-to-oldest to prioritize recent context + - When token budget is tight, OLDER turns are excluded first + - Ensures most contextually relevant recent exchanges are preserved +2. PRESENTATION PHASE: Reverses collected turns to chronological order (oldest-first) + - LLM sees natural conversation flow: "Turn 1 → Turn 2 → Turn 3..." + - Maintains proper sequential understanding while preserving recency prioritization + +This dual approach ensures optimal context preservation (newest-first) with natural +conversation flow (chronological) for maximum LLM comprehension and relevance. USAGE EXAMPLE: 1. Tool A creates thread: create_thread("analyze", request_data) → returns UUID @@ -64,7 +74,20 @@ USAGE EXAMPLE: 4. Tool B sees conversation history via build_conversation_history() 5. Tool B adds its response: add_turn(UUID, "assistant", response, tool_name="codereview") -This enables true AI-to-AI collaboration across the entire tool ecosystem. +DUAL STRATEGY EXAMPLE: +Conversation has 5 turns, token budget allows only 3 turns: + +Collection Phase (Newest-First Priority): +- Evaluates: Turn 5 → Turn 4 → Turn 3 → Turn 2 → Turn 1 +- Includes: Turn 5, Turn 4, Turn 3 (newest 3 fit in budget) +- Excludes: Turn 2, Turn 1 (oldest, dropped due to token limits) + +Presentation Phase (Chronological Order): +- LLM sees: "--- Turn 3 (Claude) ---", "--- Turn 4 (Gemini) ---", "--- Turn 5 (Claude) ---" +- Natural conversation flow maintained despite prioritizing recent context + +This enables true AI-to-AI collaboration across the entire tool ecosystem with optimal +context preservation and natural conversation understanding. """ import logging @@ -543,10 +566,27 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_ to include complete conversation history across multiple linked threads. File prioritization works across the entire chain, not just the current thread. + CONVERSATION TURN ORDERING STRATEGY: + The function employs a sophisticated two-phase approach for optimal token utilization: + + PHASE 1 - COLLECTION (Newest-First for Token Budget): + - Processes conversation turns in REVERSE chronological order (newest to oldest) + - Prioritizes recent turns within token constraints + - If token budget is exceeded, OLDER turns are excluded first + - Ensures the most contextually relevant recent exchanges are preserved + + PHASE 2 - PRESENTATION (Chronological for LLM Understanding): + - Reverses the collected turns back to chronological order (oldest to newest) + - Presents conversation flow naturally for LLM comprehension + - Maintains "--- Turn 1, Turn 2, Turn 3..." sequential numbering + - Enables LLM to follow conversation progression logically + + This approach balances recency prioritization with natural conversation flow. + TOKEN MANAGEMENT: - Uses model-specific token allocation (file_tokens + history_tokens) - Files are embedded ONCE at the start to prevent duplication - - Conversation turns are processed newest-first but presented chronologically + - Turn collection prioritizes newest-first, presentation shows chronologically - Stops adding turns when token budget would be exceeded - Gracefully handles token limits with informative notes @@ -770,13 +810,16 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_ history_parts.append("Previous conversation turns:") - # Build conversation turns bottom-up (most recent first) but present chronologically - # This ensures we include as many recent turns as possible within the token budget - turn_entries = [] # Will store (index, formatted_turn_content) for chronological ordering + # === PHASE 1: COLLECTION (Newest-First for Token Budget) === + # Build conversation turns bottom-up (most recent first) to prioritize recent context within token limits + # This ensures we include as many recent turns as possible within the token budget by excluding + # OLDER turns first when space runs out, preserving the most contextually relevant exchanges + turn_entries = [] # Will store (index, formatted_turn_content) for chronological ordering later total_turn_tokens = 0 file_embedding_tokens = sum(model_context.estimate_tokens(part) for part in history_parts) - # Process turns in reverse order (most recent first) to prioritize recent context + # CRITICAL: Process turns in REVERSE chronological order (newest to oldest) + # This prioritization strategy ensures recent context is preserved when token budget is tight for idx in range(len(all_turns) - 1, -1, -1): turn = all_turns[idx] turn_num = idx + 1 @@ -821,14 +864,19 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_ logger.debug(f"[HISTORY] Budget: {max_history_tokens:,}") break - # Add this turn to our list (we'll reverse it later for chronological order) + # Add this turn to our collection (we'll reverse it later for chronological presentation) + # Store the original index to maintain proper turn numbering in final output turn_entries.append((idx, turn_content)) total_turn_tokens += turn_tokens - # Reverse to get chronological order (oldest first) + # === PHASE 2: PRESENTATION (Chronological for LLM Understanding) === + # Reverse the collected turns to restore chronological order (oldest first) + # This gives the LLM a natural conversation flow: Turn 1 → Turn 2 → Turn 3... + # while still having prioritized recent turns during the token-constrained collection phase turn_entries.reverse() - # Add the turns in chronological order + # Add the turns in chronological order for natural LLM comprehension + # The LLM will see: "--- Turn 1 (Claude) ---" followed by "--- Turn 2 (Gemini) ---" etc. for _, turn_content in turn_entries: history_parts.append(turn_content) diff --git a/utils/file_utils.py b/utils/file_utils.py index 0d96a7c..f6d8033 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -16,6 +16,25 @@ Security Model: - All file access is restricted to PROJECT_ROOT and its subdirectories - Absolute paths are required to prevent ambiguity - Symbolic links are resolved to ensure they stay within bounds + +CONVERSATION MEMORY INTEGRATION: +This module works with the conversation memory system to support efficient +multi-turn file handling: + +1. DEDUPLICATION SUPPORT: + - File reading functions are called by conversation-aware tools + - Supports newest-first file prioritization by providing accurate token estimation + - Enables efficient file content caching and token budget management + +2. TOKEN BUDGET OPTIMIZATION: + - Provides accurate token estimation for file content before reading + - Supports the dual prioritization strategy by enabling precise budget calculations + - Enables tools to make informed decisions about which files to include + +3. CROSS-TOOL FILE PERSISTENCE: + - File reading results are used across different tools in conversation chains + - Consistent file access patterns support conversation continuation scenarios + - Error handling preserves conversation flow when files become unavailable """ import json diff --git a/utils/model_context.py b/utils/model_context.py index 1055172..3855a59 100644 --- a/utils/model_context.py +++ b/utils/model_context.py @@ -4,6 +4,26 @@ Model context management for dynamic token allocation. This module provides a clean abstraction for model-specific token management, ensuring that token limits are properly calculated based on the current model being used, not global constants. + +CONVERSATION MEMORY INTEGRATION: +This module works closely with the conversation memory system to provide +optimal token allocation for multi-turn conversations: + +1. DUAL PRIORITIZATION STRATEGY SUPPORT: + - Provides separate token budgets for conversation history vs. files + - Enables the conversation memory system to apply newest-first prioritization + - Ensures optimal balance between context preservation and new content + +2. MODEL-SPECIFIC ALLOCATION: + - Dynamic allocation based on model capabilities (context window size) + - Conservative allocation for smaller models (O3: 200K context) + - Generous allocation for larger models (Gemini: 1M+ context) + - Adapts token distribution ratios based on model capacity + +3. CROSS-TOOL CONSISTENCY: + - Provides consistent token budgets across different tools + - Enables seamless conversation continuation between tools + - Supports conversation reconstruction with proper budget management """ import logging @@ -64,13 +84,31 @@ class ModelContext: def calculate_token_allocation(self, reserved_for_response: Optional[int] = None) -> TokenAllocation: """ - Calculate token allocation based on model capacity. + Calculate token allocation based on model capacity and conversation requirements. + + This method implements the core token budget calculation that supports the + dual prioritization strategy used in conversation memory and file processing: + + TOKEN ALLOCATION STRATEGY: + 1. CONTENT vs RESPONSE SPLIT: + - Smaller models (< 300K): 60% content, 40% response (conservative) + - Larger models (≥ 300K): 80% content, 20% response (generous) + + 2. CONTENT SUB-ALLOCATION: + - File tokens: 30-40% of content budget for newest file versions + - History tokens: 40-50% of content budget for conversation context + - Remaining: Available for tool-specific prompt content + + 3. CONVERSATION MEMORY INTEGRATION: + - History allocation enables conversation reconstruction in reconstruct_thread_context() + - File allocation supports newest-first file prioritization in tools + - Remaining budget passed to tools via _remaining_tokens parameter Args: reserved_for_response: Override response token reservation Returns: - TokenAllocation with calculated budgets + TokenAllocation with calculated budgets for dual prioritization strategy """ total_tokens = self.capabilities.context_window