Improved documentation for conversation / file collection strategy, context budget allocation etc
This commit is contained in:
131
server.py
131
server.py
@@ -364,20 +364,57 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
|||||||
"""
|
"""
|
||||||
Handle incoming tool execution requests from MCP clients.
|
Handle incoming tool execution requests from MCP clients.
|
||||||
|
|
||||||
This is the main request dispatcher that routes tool calls to their
|
This is the main request dispatcher that routes tool calls to their appropriate handlers.
|
||||||
appropriate handlers. It supports both AI-powered tools (from TOOLS registry)
|
It supports both AI-powered tools (from TOOLS registry) and utility tools (implemented as
|
||||||
and utility tools (implemented as static functions).
|
static functions).
|
||||||
|
|
||||||
Thread Context Reconstruction:
|
CONVERSATION LIFECYCLE MANAGEMENT:
|
||||||
If the request contains a continuation_id, this function reconstructs
|
This function serves as the central orchestrator for multi-turn AI-to-AI conversations:
|
||||||
the conversation history and injects it into the tool's context.
|
|
||||||
|
1. THREAD RESUMPTION: When continuation_id is present, it reconstructs complete conversation
|
||||||
|
context from Redis including conversation history and file references
|
||||||
|
|
||||||
|
2. CROSS-TOOL CONTINUATION: Enables seamless handoffs between different tools (analyze →
|
||||||
|
codereview → debug) while preserving full conversation context and file references
|
||||||
|
|
||||||
|
3. CONTEXT INJECTION: Reconstructed conversation history is embedded into tool prompts
|
||||||
|
using the dual prioritization strategy:
|
||||||
|
- Files: Newest-first prioritization (recent file versions take precedence)
|
||||||
|
- Turns: Newest-first collection for token efficiency, chronological presentation for LLM
|
||||||
|
|
||||||
|
4. FOLLOW-UP GENERATION: After tool execution, generates continuation offers for ongoing
|
||||||
|
AI-to-AI collaboration with natural language instructions
|
||||||
|
|
||||||
|
STATELESS TO STATEFUL BRIDGE:
|
||||||
|
The MCP protocol is inherently stateless, but this function bridges the gap by:
|
||||||
|
- Loading persistent conversation state from Redis
|
||||||
|
- Reconstructing full multi-turn context for tool execution
|
||||||
|
- Enabling tools to access previous exchanges and file references
|
||||||
|
- Supporting conversation chains across different tool types
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
name: The name of the tool to execute
|
name: The name of the tool to execute (e.g., "analyze", "chat", "codereview")
|
||||||
arguments: Dictionary of arguments to pass to the tool
|
arguments: Dictionary of arguments to pass to the tool, potentially including:
|
||||||
|
- continuation_id: UUID for conversation thread resumption
|
||||||
|
- files: File paths for analysis (subject to deduplication)
|
||||||
|
- prompt: User request or follow-up question
|
||||||
|
- model: Specific AI model to use (optional)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of TextContent objects containing the tool's response
|
List of TextContent objects containing:
|
||||||
|
- Tool's primary response with analysis/results
|
||||||
|
- Continuation offers for follow-up conversations (when applicable)
|
||||||
|
- Structured JSON responses with status and content
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If continuation_id is invalid or conversation thread not found
|
||||||
|
Exception: For tool-specific errors or execution failures
|
||||||
|
|
||||||
|
Example Conversation Flow:
|
||||||
|
1. Claude calls analyze tool with files → creates new thread
|
||||||
|
2. Thread ID returned in continuation offer
|
||||||
|
3. Claude continues with codereview tool + continuation_id → full context preserved
|
||||||
|
4. Multiple tools can collaborate using same thread ID
|
||||||
"""
|
"""
|
||||||
logger.info(f"MCP tool call: {name}")
|
logger.info(f"MCP tool call: {name}")
|
||||||
logger.debug(f"MCP tool arguments: {list(arguments.keys())}")
|
logger.debug(f"MCP tool arguments: {list(arguments.keys())}")
|
||||||
@@ -492,16 +529,82 @@ Remember: Only suggest follow-ups when they would genuinely add value to the dis
|
|||||||
|
|
||||||
async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any]:
|
async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Reconstruct conversation context for thread continuation.
|
Reconstruct conversation context for stateless-to-stateful thread continuation.
|
||||||
|
|
||||||
This function loads the conversation history from Redis and integrates it
|
This is a critical function that transforms the inherently stateless MCP protocol into
|
||||||
into the request arguments to provide full context to the tool.
|
stateful multi-turn conversations. It loads persistent conversation state from Redis
|
||||||
|
and rebuilds complete conversation context using the sophisticated dual prioritization
|
||||||
|
strategy implemented in the conversation memory system.
|
||||||
|
|
||||||
|
CONTEXT RECONSTRUCTION PROCESS:
|
||||||
|
|
||||||
|
1. THREAD RETRIEVAL: Loads complete ThreadContext from Redis using continuation_id
|
||||||
|
- Includes all conversation turns with tool attribution
|
||||||
|
- Preserves file references and cross-tool context
|
||||||
|
- Handles conversation chains across multiple linked threads
|
||||||
|
|
||||||
|
2. CONVERSATION HISTORY BUILDING: Uses build_conversation_history() to create
|
||||||
|
comprehensive context with intelligent prioritization:
|
||||||
|
|
||||||
|
FILE PRIORITIZATION (Newest-First Throughout):
|
||||||
|
- When same file appears in multiple turns, newest reference wins
|
||||||
|
- File embedding prioritizes recent versions, excludes older duplicates
|
||||||
|
- Token budget management ensures most relevant files are preserved
|
||||||
|
|
||||||
|
CONVERSATION TURN PRIORITIZATION (Dual Strategy):
|
||||||
|
- Collection Phase: Processes turns newest-to-oldest for token efficiency
|
||||||
|
- Presentation Phase: Presents turns chronologically for LLM understanding
|
||||||
|
- Ensures recent context is preserved when token budget is constrained
|
||||||
|
|
||||||
|
3. CONTEXT INJECTION: Embeds reconstructed history into tool request arguments
|
||||||
|
- Conversation history becomes part of the tool's prompt context
|
||||||
|
- Files referenced in previous turns are accessible to current tool
|
||||||
|
- Cross-tool knowledge transfer is seamless and comprehensive
|
||||||
|
|
||||||
|
4. TOKEN BUDGET MANAGEMENT: Applies model-specific token allocation
|
||||||
|
- Balances conversation history vs. file content vs. response space
|
||||||
|
- Gracefully handles token limits with intelligent exclusion strategies
|
||||||
|
- Preserves most contextually relevant information within constraints
|
||||||
|
|
||||||
|
CROSS-TOOL CONTINUATION SUPPORT:
|
||||||
|
This function enables seamless handoffs between different tools:
|
||||||
|
- Analyze tool → Debug tool: Full file context and analysis preserved
|
||||||
|
- Chat tool → CodeReview tool: Conversation context maintained
|
||||||
|
- Any tool → Any tool: Complete cross-tool knowledge transfer
|
||||||
|
|
||||||
|
ERROR HANDLING & RECOVERY:
|
||||||
|
- Thread expiration: Provides clear instructions for conversation restart
|
||||||
|
- Redis unavailability: Graceful degradation with error messaging
|
||||||
|
- Invalid continuation_id: Security validation and user-friendly errors
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
arguments: Original request arguments containing continuation_id
|
arguments: Original request arguments dictionary containing:
|
||||||
|
- continuation_id (required): UUID of conversation thread to resume
|
||||||
|
- Other tool-specific arguments that will be preserved
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Modified arguments with conversation history injected
|
dict[str, Any]: Enhanced arguments dictionary with conversation context:
|
||||||
|
- Original arguments preserved
|
||||||
|
- Conversation history embedded in appropriate format for tool consumption
|
||||||
|
- File context from previous turns made accessible
|
||||||
|
- Cross-tool knowledge transfer enabled
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: When continuation_id is invalid, thread not found, or expired
|
||||||
|
Includes user-friendly recovery instructions
|
||||||
|
|
||||||
|
Performance Characteristics:
|
||||||
|
- O(1) thread lookup in Redis
|
||||||
|
- O(n) conversation history reconstruction where n = number of turns
|
||||||
|
- Intelligent token budgeting prevents context window overflow
|
||||||
|
- Optimized file deduplication minimizes redundant content
|
||||||
|
|
||||||
|
Example Usage Flow:
|
||||||
|
1. Claude: "Continue analyzing the security issues" + continuation_id
|
||||||
|
2. reconstruct_thread_context() loads previous analyze conversation
|
||||||
|
3. Debug tool receives full context including previous file analysis
|
||||||
|
4. Debug tool can reference specific findings from analyze tool
|
||||||
|
5. Natural cross-tool collaboration without context loss
|
||||||
"""
|
"""
|
||||||
from utils.conversation_memory import add_turn, build_conversation_history, get_thread
|
from utils.conversation_memory import add_turn, build_conversation_history, get_thread
|
||||||
|
|
||||||
|
|||||||
@@ -93,6 +93,30 @@ class BaseTool(ABC):
|
|||||||
This class defines the interface that all tools must implement and provides
|
This class defines the interface that all tools must implement and provides
|
||||||
common functionality for request handling, model creation, and response formatting.
|
common functionality for request handling, model creation, and response formatting.
|
||||||
|
|
||||||
|
CONVERSATION-AWARE FILE PROCESSING:
|
||||||
|
This base class implements the sophisticated dual prioritization strategy for
|
||||||
|
conversation-aware file handling across all tools:
|
||||||
|
|
||||||
|
1. FILE DEDUPLICATION WITH NEWEST-FIRST PRIORITY:
|
||||||
|
- When same file appears in multiple conversation turns, newest reference wins
|
||||||
|
- Prevents redundant file embedding while preserving most recent file state
|
||||||
|
- Cross-tool file tracking ensures consistent behavior across analyze → codereview → debug
|
||||||
|
|
||||||
|
2. CONVERSATION CONTEXT INTEGRATION:
|
||||||
|
- All tools receive enhanced prompts with conversation history via reconstruct_thread_context()
|
||||||
|
- File references from previous turns are preserved and accessible
|
||||||
|
- Cross-tool knowledge transfer maintains full context without manual file re-specification
|
||||||
|
|
||||||
|
3. TOKEN-AWARE FILE EMBEDDING:
|
||||||
|
- Respects model-specific token allocation budgets from ModelContext
|
||||||
|
- Prioritizes conversation history, then newest files, then remaining content
|
||||||
|
- Graceful degradation when token limits are approached
|
||||||
|
|
||||||
|
4. STATELESS-TO-STATEFUL BRIDGING:
|
||||||
|
- Tools operate on stateless MCP requests but access full conversation state
|
||||||
|
- Conversation memory automatically injected via continuation_id parameter
|
||||||
|
- Enables natural AI-to-AI collaboration across tool boundaries
|
||||||
|
|
||||||
To create a new tool:
|
To create a new tool:
|
||||||
1. Create a new class that inherits from BaseTool
|
1. Create a new class that inherits from BaseTool
|
||||||
2. Implement all abstract methods
|
2. Implement all abstract methods
|
||||||
@@ -546,12 +570,33 @@ class BaseTool(ABC):
|
|||||||
arguments: Optional[dict] = None,
|
arguments: Optional[dict] = None,
|
||||||
) -> tuple[str, list[str]]:
|
) -> tuple[str, list[str]]:
|
||||||
"""
|
"""
|
||||||
Centralized file processing for tool prompts.
|
Centralized file processing implementing dual prioritization strategy.
|
||||||
|
|
||||||
This method handles the common pattern across all tools:
|
DUAL PRIORITIZATION STRATEGY CORE IMPLEMENTATION:
|
||||||
1. Filter out files already embedded in conversation history
|
This method is the heart of conversation-aware file processing across all tools:
|
||||||
2. Read content of only new files
|
|
||||||
3. Generate informative note about skipped files
|
1. CONVERSATION-AWARE FILE DEDUPLICATION:
|
||||||
|
- Automatically detects and filters files already embedded in conversation history
|
||||||
|
- Implements newest-first prioritization: when same file appears in multiple turns,
|
||||||
|
only the newest reference is preserved to avoid redundant content
|
||||||
|
- Cross-tool file tracking ensures consistent behavior across tool boundaries
|
||||||
|
|
||||||
|
2. TOKEN-BUDGET OPTIMIZATION:
|
||||||
|
- Respects remaining token budget from conversation context reconstruction
|
||||||
|
- Prioritizes conversation history + newest file versions within constraints
|
||||||
|
- Graceful degradation when token limits approached (newest files preserved first)
|
||||||
|
- Model-specific token allocation ensures optimal context window utilization
|
||||||
|
|
||||||
|
3. CROSS-TOOL CONTINUATION SUPPORT:
|
||||||
|
- File references persist across different tools (analyze → codereview → debug)
|
||||||
|
- Previous tool file embeddings are tracked and excluded from new embeddings
|
||||||
|
- Maintains complete file context without manual re-specification
|
||||||
|
|
||||||
|
PROCESSING WORKFLOW:
|
||||||
|
1. Filter out files already embedded in conversation history using newest-first priority
|
||||||
|
2. Read content of only new files within remaining token budget
|
||||||
|
3. Generate informative notes about skipped files for user transparency
|
||||||
|
4. Return formatted content ready for prompt inclusion
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
request_files: List of files requested for current tool execution
|
request_files: List of files requested for current tool execution
|
||||||
|
|||||||
@@ -39,10 +39,12 @@ Key Features:
|
|||||||
- Thread-safe operations for concurrent access
|
- Thread-safe operations for concurrent access
|
||||||
- Graceful degradation when Redis is unavailable
|
- Graceful degradation when Redis is unavailable
|
||||||
|
|
||||||
FILE PRIORITIZATION STRATEGY:
|
DUAL PRIORITIZATION STRATEGY (Files & Conversations):
|
||||||
The conversation memory system implements a sophisticated file prioritization algorithm
|
The conversation memory system implements sophisticated prioritization for both files and
|
||||||
that ensures newer file references always take precedence over older ones:
|
conversation turns, using a consistent "newest-first" approach during collection but
|
||||||
|
presenting information in the optimal format for LLM consumption:
|
||||||
|
|
||||||
|
FILE PRIORITIZATION (Newest-First Throughout):
|
||||||
1. When collecting files across conversation turns, the system walks BACKWARDS through
|
1. When collecting files across conversation turns, the system walks BACKWARDS through
|
||||||
turns (newest to oldest) and builds a unique file list
|
turns (newest to oldest) and builds a unique file list
|
||||||
2. If the same file path appears in multiple turns, only the reference from the
|
2. If the same file path appears in multiple turns, only the reference from the
|
||||||
@@ -54,8 +56,16 @@ that ensures newer file references always take precedence over older ones:
|
|||||||
4. This strategy works across conversation chains - files from newer turns in ANY
|
4. This strategy works across conversation chains - files from newer turns in ANY
|
||||||
thread take precedence over files from older turns in ANY thread
|
thread take precedence over files from older turns in ANY thread
|
||||||
|
|
||||||
This approach ensures that when token limits force file exclusions, the most
|
CONVERSATION TURN PRIORITIZATION (Newest-First Collection, Chronological Presentation):
|
||||||
recently referenced and contextually relevant files are preserved.
|
1. COLLECTION PHASE: Processes turns newest-to-oldest to prioritize recent context
|
||||||
|
- When token budget is tight, OLDER turns are excluded first
|
||||||
|
- Ensures most contextually relevant recent exchanges are preserved
|
||||||
|
2. PRESENTATION PHASE: Reverses collected turns to chronological order (oldest-first)
|
||||||
|
- LLM sees natural conversation flow: "Turn 1 → Turn 2 → Turn 3..."
|
||||||
|
- Maintains proper sequential understanding while preserving recency prioritization
|
||||||
|
|
||||||
|
This dual approach ensures optimal context preservation (newest-first) with natural
|
||||||
|
conversation flow (chronological) for maximum LLM comprehension and relevance.
|
||||||
|
|
||||||
USAGE EXAMPLE:
|
USAGE EXAMPLE:
|
||||||
1. Tool A creates thread: create_thread("analyze", request_data) → returns UUID
|
1. Tool A creates thread: create_thread("analyze", request_data) → returns UUID
|
||||||
@@ -64,7 +74,20 @@ USAGE EXAMPLE:
|
|||||||
4. Tool B sees conversation history via build_conversation_history()
|
4. Tool B sees conversation history via build_conversation_history()
|
||||||
5. Tool B adds its response: add_turn(UUID, "assistant", response, tool_name="codereview")
|
5. Tool B adds its response: add_turn(UUID, "assistant", response, tool_name="codereview")
|
||||||
|
|
||||||
This enables true AI-to-AI collaboration across the entire tool ecosystem.
|
DUAL STRATEGY EXAMPLE:
|
||||||
|
Conversation has 5 turns, token budget allows only 3 turns:
|
||||||
|
|
||||||
|
Collection Phase (Newest-First Priority):
|
||||||
|
- Evaluates: Turn 5 → Turn 4 → Turn 3 → Turn 2 → Turn 1
|
||||||
|
- Includes: Turn 5, Turn 4, Turn 3 (newest 3 fit in budget)
|
||||||
|
- Excludes: Turn 2, Turn 1 (oldest, dropped due to token limits)
|
||||||
|
|
||||||
|
Presentation Phase (Chronological Order):
|
||||||
|
- LLM sees: "--- Turn 3 (Claude) ---", "--- Turn 4 (Gemini) ---", "--- Turn 5 (Claude) ---"
|
||||||
|
- Natural conversation flow maintained despite prioritizing recent context
|
||||||
|
|
||||||
|
This enables true AI-to-AI collaboration across the entire tool ecosystem with optimal
|
||||||
|
context preservation and natural conversation understanding.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -543,10 +566,27 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
|
|||||||
to include complete conversation history across multiple linked threads. File
|
to include complete conversation history across multiple linked threads. File
|
||||||
prioritization works across the entire chain, not just the current thread.
|
prioritization works across the entire chain, not just the current thread.
|
||||||
|
|
||||||
|
CONVERSATION TURN ORDERING STRATEGY:
|
||||||
|
The function employs a sophisticated two-phase approach for optimal token utilization:
|
||||||
|
|
||||||
|
PHASE 1 - COLLECTION (Newest-First for Token Budget):
|
||||||
|
- Processes conversation turns in REVERSE chronological order (newest to oldest)
|
||||||
|
- Prioritizes recent turns within token constraints
|
||||||
|
- If token budget is exceeded, OLDER turns are excluded first
|
||||||
|
- Ensures the most contextually relevant recent exchanges are preserved
|
||||||
|
|
||||||
|
PHASE 2 - PRESENTATION (Chronological for LLM Understanding):
|
||||||
|
- Reverses the collected turns back to chronological order (oldest to newest)
|
||||||
|
- Presents conversation flow naturally for LLM comprehension
|
||||||
|
- Maintains "--- Turn 1, Turn 2, Turn 3..." sequential numbering
|
||||||
|
- Enables LLM to follow conversation progression logically
|
||||||
|
|
||||||
|
This approach balances recency prioritization with natural conversation flow.
|
||||||
|
|
||||||
TOKEN MANAGEMENT:
|
TOKEN MANAGEMENT:
|
||||||
- Uses model-specific token allocation (file_tokens + history_tokens)
|
- Uses model-specific token allocation (file_tokens + history_tokens)
|
||||||
- Files are embedded ONCE at the start to prevent duplication
|
- Files are embedded ONCE at the start to prevent duplication
|
||||||
- Conversation turns are processed newest-first but presented chronologically
|
- Turn collection prioritizes newest-first, presentation shows chronologically
|
||||||
- Stops adding turns when token budget would be exceeded
|
- Stops adding turns when token budget would be exceeded
|
||||||
- Gracefully handles token limits with informative notes
|
- Gracefully handles token limits with informative notes
|
||||||
|
|
||||||
@@ -770,13 +810,16 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
|
|||||||
|
|
||||||
history_parts.append("Previous conversation turns:")
|
history_parts.append("Previous conversation turns:")
|
||||||
|
|
||||||
# Build conversation turns bottom-up (most recent first) but present chronologically
|
# === PHASE 1: COLLECTION (Newest-First for Token Budget) ===
|
||||||
# This ensures we include as many recent turns as possible within the token budget
|
# Build conversation turns bottom-up (most recent first) to prioritize recent context within token limits
|
||||||
turn_entries = [] # Will store (index, formatted_turn_content) for chronological ordering
|
# This ensures we include as many recent turns as possible within the token budget by excluding
|
||||||
|
# OLDER turns first when space runs out, preserving the most contextually relevant exchanges
|
||||||
|
turn_entries = [] # Will store (index, formatted_turn_content) for chronological ordering later
|
||||||
total_turn_tokens = 0
|
total_turn_tokens = 0
|
||||||
file_embedding_tokens = sum(model_context.estimate_tokens(part) for part in history_parts)
|
file_embedding_tokens = sum(model_context.estimate_tokens(part) for part in history_parts)
|
||||||
|
|
||||||
# Process turns in reverse order (most recent first) to prioritize recent context
|
# CRITICAL: Process turns in REVERSE chronological order (newest to oldest)
|
||||||
|
# This prioritization strategy ensures recent context is preserved when token budget is tight
|
||||||
for idx in range(len(all_turns) - 1, -1, -1):
|
for idx in range(len(all_turns) - 1, -1, -1):
|
||||||
turn = all_turns[idx]
|
turn = all_turns[idx]
|
||||||
turn_num = idx + 1
|
turn_num = idx + 1
|
||||||
@@ -821,14 +864,19 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
|
|||||||
logger.debug(f"[HISTORY] Budget: {max_history_tokens:,}")
|
logger.debug(f"[HISTORY] Budget: {max_history_tokens:,}")
|
||||||
break
|
break
|
||||||
|
|
||||||
# Add this turn to our list (we'll reverse it later for chronological order)
|
# Add this turn to our collection (we'll reverse it later for chronological presentation)
|
||||||
|
# Store the original index to maintain proper turn numbering in final output
|
||||||
turn_entries.append((idx, turn_content))
|
turn_entries.append((idx, turn_content))
|
||||||
total_turn_tokens += turn_tokens
|
total_turn_tokens += turn_tokens
|
||||||
|
|
||||||
# Reverse to get chronological order (oldest first)
|
# === PHASE 2: PRESENTATION (Chronological for LLM Understanding) ===
|
||||||
|
# Reverse the collected turns to restore chronological order (oldest first)
|
||||||
|
# This gives the LLM a natural conversation flow: Turn 1 → Turn 2 → Turn 3...
|
||||||
|
# while still having prioritized recent turns during the token-constrained collection phase
|
||||||
turn_entries.reverse()
|
turn_entries.reverse()
|
||||||
|
|
||||||
# Add the turns in chronological order
|
# Add the turns in chronological order for natural LLM comprehension
|
||||||
|
# The LLM will see: "--- Turn 1 (Claude) ---" followed by "--- Turn 2 (Gemini) ---" etc.
|
||||||
for _, turn_content in turn_entries:
|
for _, turn_content in turn_entries:
|
||||||
history_parts.append(turn_content)
|
history_parts.append(turn_content)
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,25 @@ Security Model:
|
|||||||
- All file access is restricted to PROJECT_ROOT and its subdirectories
|
- All file access is restricted to PROJECT_ROOT and its subdirectories
|
||||||
- Absolute paths are required to prevent ambiguity
|
- Absolute paths are required to prevent ambiguity
|
||||||
- Symbolic links are resolved to ensure they stay within bounds
|
- Symbolic links are resolved to ensure they stay within bounds
|
||||||
|
|
||||||
|
CONVERSATION MEMORY INTEGRATION:
|
||||||
|
This module works with the conversation memory system to support efficient
|
||||||
|
multi-turn file handling:
|
||||||
|
|
||||||
|
1. DEDUPLICATION SUPPORT:
|
||||||
|
- File reading functions are called by conversation-aware tools
|
||||||
|
- Supports newest-first file prioritization by providing accurate token estimation
|
||||||
|
- Enables efficient file content caching and token budget management
|
||||||
|
|
||||||
|
2. TOKEN BUDGET OPTIMIZATION:
|
||||||
|
- Provides accurate token estimation for file content before reading
|
||||||
|
- Supports the dual prioritization strategy by enabling precise budget calculations
|
||||||
|
- Enables tools to make informed decisions about which files to include
|
||||||
|
|
||||||
|
3. CROSS-TOOL FILE PERSISTENCE:
|
||||||
|
- File reading results are used across different tools in conversation chains
|
||||||
|
- Consistent file access patterns support conversation continuation scenarios
|
||||||
|
- Error handling preserves conversation flow when files become unavailable
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|||||||
@@ -4,6 +4,26 @@ Model context management for dynamic token allocation.
|
|||||||
This module provides a clean abstraction for model-specific token management,
|
This module provides a clean abstraction for model-specific token management,
|
||||||
ensuring that token limits are properly calculated based on the current model
|
ensuring that token limits are properly calculated based on the current model
|
||||||
being used, not global constants.
|
being used, not global constants.
|
||||||
|
|
||||||
|
CONVERSATION MEMORY INTEGRATION:
|
||||||
|
This module works closely with the conversation memory system to provide
|
||||||
|
optimal token allocation for multi-turn conversations:
|
||||||
|
|
||||||
|
1. DUAL PRIORITIZATION STRATEGY SUPPORT:
|
||||||
|
- Provides separate token budgets for conversation history vs. files
|
||||||
|
- Enables the conversation memory system to apply newest-first prioritization
|
||||||
|
- Ensures optimal balance between context preservation and new content
|
||||||
|
|
||||||
|
2. MODEL-SPECIFIC ALLOCATION:
|
||||||
|
- Dynamic allocation based on model capabilities (context window size)
|
||||||
|
- Conservative allocation for smaller models (O3: 200K context)
|
||||||
|
- Generous allocation for larger models (Gemini: 1M+ context)
|
||||||
|
- Adapts token distribution ratios based on model capacity
|
||||||
|
|
||||||
|
3. CROSS-TOOL CONSISTENCY:
|
||||||
|
- Provides consistent token budgets across different tools
|
||||||
|
- Enables seamless conversation continuation between tools
|
||||||
|
- Supports conversation reconstruction with proper budget management
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -64,13 +84,31 @@ class ModelContext:
|
|||||||
|
|
||||||
def calculate_token_allocation(self, reserved_for_response: Optional[int] = None) -> TokenAllocation:
|
def calculate_token_allocation(self, reserved_for_response: Optional[int] = None) -> TokenAllocation:
|
||||||
"""
|
"""
|
||||||
Calculate token allocation based on model capacity.
|
Calculate token allocation based on model capacity and conversation requirements.
|
||||||
|
|
||||||
|
This method implements the core token budget calculation that supports the
|
||||||
|
dual prioritization strategy used in conversation memory and file processing:
|
||||||
|
|
||||||
|
TOKEN ALLOCATION STRATEGY:
|
||||||
|
1. CONTENT vs RESPONSE SPLIT:
|
||||||
|
- Smaller models (< 300K): 60% content, 40% response (conservative)
|
||||||
|
- Larger models (≥ 300K): 80% content, 20% response (generous)
|
||||||
|
|
||||||
|
2. CONTENT SUB-ALLOCATION:
|
||||||
|
- File tokens: 30-40% of content budget for newest file versions
|
||||||
|
- History tokens: 40-50% of content budget for conversation context
|
||||||
|
- Remaining: Available for tool-specific prompt content
|
||||||
|
|
||||||
|
3. CONVERSATION MEMORY INTEGRATION:
|
||||||
|
- History allocation enables conversation reconstruction in reconstruct_thread_context()
|
||||||
|
- File allocation supports newest-first file prioritization in tools
|
||||||
|
- Remaining budget passed to tools via _remaining_tokens parameter
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
reserved_for_response: Override response token reservation
|
reserved_for_response: Override response token reservation
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
TokenAllocation with calculated budgets
|
TokenAllocation with calculated budgets for dual prioritization strategy
|
||||||
"""
|
"""
|
||||||
total_tokens = self.capabilities.context_window
|
total_tokens = self.capabilities.context_window
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user