Rebranding, refactoring, renaming, cleanup, updated docs

2025-06-12 10:40:43 +04:00
parent 9a55ca8898
commit fb66825bf6
55 changed files with 1048 additions and 1474 deletions
--- a/utils/init.py
+++ b/utils/init.py
@@ -1,5 +1,5 @@
 """
-Utility functions for Gemini MCP Server
+Utility functions for Zen MCP Server
 """

 from .file_utils import CODE_EXTENSIONS, expand_paths, read_file_content, read_files
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -312,41 +312,41 @@ def add_turn(
 def get_thread_chain(thread_id: str, max_depth: int = 20) -> list[ThreadContext]:
    """
    Traverse the parent chain to get all threads in conversation sequence.
-    
+
    Retrieves the complete conversation chain by following parent_thread_id
    links. Returns threads in chronological order (oldest first).
-    
+
    Args:
        thread_id: Starting thread ID
        max_depth: Maximum chain depth to prevent infinite loops
-        
+
    Returns:
        list[ThreadContext]: All threads in chain, oldest first
    """
    chain = []
    current_id = thread_id
    seen_ids = set()
-    
+
    # Build chain from current to oldest
    while current_id and len(chain) < max_depth:
        # Prevent circular references
        if current_id in seen_ids:
            logger.warning(f"[THREAD] Circular reference detected in thread chain at {current_id}")
            break
-            
+
        seen_ids.add(current_id)
-        
+
        context = get_thread(current_id)
        if not context:
            logger.debug(f"[THREAD] Thread {current_id} not found in chain traversal")
            break
-            
+
        chain.append(context)
        current_id = context.parent_thread_id
-        
+
    # Reverse to get chronological order (oldest first)
    chain.reverse()
-    
+
    logger.debug(f"[THREAD] Retrieved chain of {len(chain)} threads for {thread_id}")
    return chain

@@ -400,7 +400,7 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
    full file contents from all referenced files. Files are embedded only ONCE at the
    start, even if referenced in multiple turns, to prevent duplication and optimize
    token usage.
-    
+
    If the thread has a parent chain, this function traverses the entire chain to
    include the complete conversation history.

@@ -429,21 +429,21 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
    if context.parent_thread_id:
        # This thread has a parent, get the full chain
        chain = get_thread_chain(context.thread_id)
-        
+
        # Collect all turns from all threads in chain
        all_turns = []
        all_files_set = set()
        total_turns = 0
-        
+
        for thread in chain:
            all_turns.extend(thread.turns)
            total_turns += len(thread.turns)
-            
+
            # Collect files from this thread
            for turn in thread.turns:
                if turn.files:
                    all_files_set.update(turn.files)
-                    
+
        all_files = list(all_files_set)
        logger.debug(f"[THREAD] Built history from {len(chain)} threads with {total_turns} total turns")
    else:
@@ -451,7 +451,7 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
        all_turns = context.turns
        total_turns = len(context.turns)
        all_files = get_conversation_file_list(context)
-        
+
    if not all_turns:
        return "", 0

@@ -459,18 +459,19 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_

    # Get model-specific token allocation early (needed for both files and turns)
    if model_context is None:
-        from utils.model_context import ModelContext
        from config import DEFAULT_MODEL
+        from utils.model_context import ModelContext
+
        model_context = ModelContext(DEFAULT_MODEL)
-    
+
    token_allocation = model_context.calculate_token_allocation()
    max_file_tokens = token_allocation.file_tokens
    max_history_tokens = token_allocation.history_tokens
-    
+
    logger.debug(f"[HISTORY] Using model-specific limits for {model_context.model_name}:")
    logger.debug(f"[HISTORY]   Max file tokens: {max_file_tokens:,}")
    logger.debug(f"[HISTORY]   Max history tokens: {max_history_tokens:,}")
-    
+
    history_parts = [
        "=== CONVERSATION HISTORY ===",
        f"Thread: {context.thread_id}",
@@ -584,13 +585,13 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
        )

    history_parts.append("Previous conversation turns:")
-    
+
    # Build conversation turns bottom-up (most recent first) but present chronologically
    # This ensures we include as many recent turns as possible within the token budget
    turn_entries = []  # Will store (index, formatted_turn_content) for chronological ordering
    total_turn_tokens = 0
    file_embedding_tokens = sum(model_context.estimate_tokens(part) for part in history_parts)
-    
+
    # Process turns in reverse order (most recent first) to prioritize recent context
    for idx in range(len(all_turns) - 1, -1, -1):
        turn = all_turns[idx]
@@ -599,16 +600,16 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_

        # Build the complete turn content
        turn_parts = []
-        
+
        # Add turn header with tool attribution for cross-tool tracking
        turn_header = f"\n--- Turn {turn_num} ({role_label}"
        if turn.tool_name:
            turn_header += f" using {turn.tool_name}"
-        
+
        # Add model info if available
        if turn.model_provider and turn.model_name:
            turn_header += f" via {turn.model_provider}/{turn.model_name}"
-            
+
        turn_header += ") ---"
        turn_parts.append(turn_header)

@@ -624,11 +625,11 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
        # Add follow-up question if present
        if turn.follow_up_question:
            turn_parts.append(f"\n[Gemini's Follow-up: {turn.follow_up_question}]")
-            
+
        # Calculate tokens for this turn
        turn_content = "\n".join(turn_parts)
        turn_tokens = model_context.estimate_tokens(turn_content)
-        
+
        # Check if adding this turn would exceed history budget
        if file_embedding_tokens + total_turn_tokens + turn_tokens > max_history_tokens:
            # Stop adding turns - we've reached the limit
@@ -639,18 +640,18 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
            logger.debug(f"[HISTORY]   Would total: {file_embedding_tokens + total_turn_tokens + turn_tokens:,}")
            logger.debug(f"[HISTORY]   Budget: {max_history_tokens:,}")
            break
-            
+
        # Add this turn to our list (we'll reverse it later for chronological order)
        turn_entries.append((idx, turn_content))
        total_turn_tokens += turn_tokens
-        
+
    # Reverse to get chronological order (oldest first)
    turn_entries.reverse()
-    
+
    # Add the turns in chronological order
    for _, turn_content in turn_entries:
        history_parts.append(turn_content)
-        
+
    # Log what we included
    included_turns = len(turn_entries)
    total_turns = len(all_turns)
--- a/utils/model_context.py
+++ b/utils/model_context.py
@@ -6,12 +6,12 @@ ensuring that token limits are properly calculated based on the current model
 being used, not global constants.
 """

-from typing import Optional, Dict, Any
-from dataclasses import dataclass
 import logging
+from dataclasses import dataclass
+from typing import Any, Optional

-from providers import ModelProviderRegistry, ModelCapabilities
 from config import DEFAULT_MODEL
+from providers import ModelCapabilities, ModelProviderRegistry

 logger = logging.getLogger(__name__)

@@ -19,12 +19,13 @@ logger = logging.getLogger(__name__)
@dataclass
 class TokenAllocation:
    """Token allocation strategy for a model."""
+
    total_tokens: int
    content_tokens: int
    response_tokens: int
    file_tokens: int
    history_tokens: int
-    
+
    @property
    def available_for_prompt(self) -> int:
        """Tokens available for the actual prompt after allocations."""
@@ -34,17 +35,17 @@ class TokenAllocation:
 class ModelContext:
    """
    Encapsulates model-specific information and token calculations.
-    
+
    This class provides a single source of truth for all model-related
    token calculations, ensuring consistency across the system.
    """
-    
+
    def __init__(self, model_name: str):
        self.model_name = model_name
        self._provider = None
        self._capabilities = None
        self._token_allocation = None
-        
+
    @property
    def provider(self):
        """Get the model provider lazily."""
@@ -53,78 +54,78 @@ class ModelContext:
            if not self._provider:
                raise ValueError(f"No provider found for model: {self.model_name}")
        return self._provider
-        
+
    @property
    def capabilities(self) -> ModelCapabilities:
        """Get model capabilities lazily."""
        if self._capabilities is None:
            self._capabilities = self.provider.get_capabilities(self.model_name)
        return self._capabilities
-        
+
    def calculate_token_allocation(self, reserved_for_response: Optional[int] = None) -> TokenAllocation:
        """
        Calculate token allocation based on model capacity.
-        
+
        Args:
            reserved_for_response: Override response token reservation
-            
+
        Returns:
            TokenAllocation with calculated budgets
        """
        total_tokens = self.capabilities.max_tokens
-        
+
        # Dynamic allocation based on model capacity
        if total_tokens < 300_000:
-            # Smaller context models (O3, GPT-4O): Conservative allocation
+            # Smaller context models (O3): Conservative allocation
            content_ratio = 0.6  # 60% for content
            response_ratio = 0.4  # 40% for response
            file_ratio = 0.3  # 30% of content for files
            history_ratio = 0.5  # 50% of content for history
        else:
            # Larger context models (Gemini): More generous allocation
-            content_ratio = 0.8  # 80% for content  
+            content_ratio = 0.8  # 80% for content
            response_ratio = 0.2  # 20% for response
            file_ratio = 0.4  # 40% of content for files
            history_ratio = 0.4  # 40% of content for history
-            
+
        # Calculate allocations
        content_tokens = int(total_tokens * content_ratio)
        response_tokens = reserved_for_response or int(total_tokens * response_ratio)
-        
+
        # Sub-allocations within content budget
        file_tokens = int(content_tokens * file_ratio)
        history_tokens = int(content_tokens * history_ratio)
-        
+
        allocation = TokenAllocation(
            total_tokens=total_tokens,
            content_tokens=content_tokens,
            response_tokens=response_tokens,
            file_tokens=file_tokens,
-            history_tokens=history_tokens
+            history_tokens=history_tokens,
        )
-        
+
        logger.debug(f"Token allocation for {self.model_name}:")
        logger.debug(f"  Total: {allocation.total_tokens:,}")
        logger.debug(f"  Content: {allocation.content_tokens:,} ({content_ratio:.0%})")
        logger.debug(f"  Response: {allocation.response_tokens:,} ({response_ratio:.0%})")
        logger.debug(f"  Files: {allocation.file_tokens:,} ({file_ratio:.0%} of content)")
        logger.debug(f"  History: {allocation.history_tokens:,} ({history_ratio:.0%} of content)")
-        
+
        return allocation
-        
+
    def estimate_tokens(self, text: str) -> int:
        """
        Estimate token count for text using model-specific tokenizer.
-        
+
        For now, uses simple estimation. Can be enhanced with model-specific
        tokenizers (tiktoken for OpenAI, etc.) in the future.
        """
        # TODO: Integrate model-specific tokenizers
        # For now, use conservative estimation
        return len(text) // 3  # Conservative estimate
-        
+
    @classmethod
-    def from_arguments(cls, arguments: Dict[str, Any]) -> "ModelContext":
+    def from_arguments(cls, arguments: dict[str, Any]) -> "ModelContext":
        """Create ModelContext from tool arguments."""
        model_name = arguments.get("model") or DEFAULT_MODEL
-        return cls(model_name)
+        return cls(model_name)