Rebranding, refactoring, renaming, cleanup, updated docs

This commit is contained in:
Fahad
2025-06-12 10:40:43 +04:00
parent 9a55ca8898
commit fb66825bf6
55 changed files with 1048 additions and 1474 deletions

View File

@@ -1,5 +1,5 @@
"""
Utility functions for Gemini MCP Server
Utility functions for Zen MCP Server
"""
from .file_utils import CODE_EXTENSIONS, expand_paths, read_file_content, read_files

View File

@@ -312,41 +312,41 @@ def add_turn(
def get_thread_chain(thread_id: str, max_depth: int = 20) -> list[ThreadContext]:
"""
Traverse the parent chain to get all threads in conversation sequence.
Retrieves the complete conversation chain by following parent_thread_id
links. Returns threads in chronological order (oldest first).
Args:
thread_id: Starting thread ID
max_depth: Maximum chain depth to prevent infinite loops
Returns:
list[ThreadContext]: All threads in chain, oldest first
"""
chain = []
current_id = thread_id
seen_ids = set()
# Build chain from current to oldest
while current_id and len(chain) < max_depth:
# Prevent circular references
if current_id in seen_ids:
logger.warning(f"[THREAD] Circular reference detected in thread chain at {current_id}")
break
seen_ids.add(current_id)
context = get_thread(current_id)
if not context:
logger.debug(f"[THREAD] Thread {current_id} not found in chain traversal")
break
chain.append(context)
current_id = context.parent_thread_id
# Reverse to get chronological order (oldest first)
chain.reverse()
logger.debug(f"[THREAD] Retrieved chain of {len(chain)} threads for {thread_id}")
return chain
@@ -400,7 +400,7 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
full file contents from all referenced files. Files are embedded only ONCE at the
start, even if referenced in multiple turns, to prevent duplication and optimize
token usage.
If the thread has a parent chain, this function traverses the entire chain to
include the complete conversation history.
@@ -429,21 +429,21 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
if context.parent_thread_id:
# This thread has a parent, get the full chain
chain = get_thread_chain(context.thread_id)
# Collect all turns from all threads in chain
all_turns = []
all_files_set = set()
total_turns = 0
for thread in chain:
all_turns.extend(thread.turns)
total_turns += len(thread.turns)
# Collect files from this thread
for turn in thread.turns:
if turn.files:
all_files_set.update(turn.files)
all_files = list(all_files_set)
logger.debug(f"[THREAD] Built history from {len(chain)} threads with {total_turns} total turns")
else:
@@ -451,7 +451,7 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
all_turns = context.turns
total_turns = len(context.turns)
all_files = get_conversation_file_list(context)
if not all_turns:
return "", 0
@@ -459,18 +459,19 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
# Get model-specific token allocation early (needed for both files and turns)
if model_context is None:
from utils.model_context import ModelContext
from config import DEFAULT_MODEL
from utils.model_context import ModelContext
model_context = ModelContext(DEFAULT_MODEL)
token_allocation = model_context.calculate_token_allocation()
max_file_tokens = token_allocation.file_tokens
max_history_tokens = token_allocation.history_tokens
logger.debug(f"[HISTORY] Using model-specific limits for {model_context.model_name}:")
logger.debug(f"[HISTORY] Max file tokens: {max_file_tokens:,}")
logger.debug(f"[HISTORY] Max history tokens: {max_history_tokens:,}")
history_parts = [
"=== CONVERSATION HISTORY ===",
f"Thread: {context.thread_id}",
@@ -584,13 +585,13 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
)
history_parts.append("Previous conversation turns:")
# Build conversation turns bottom-up (most recent first) but present chronologically
# This ensures we include as many recent turns as possible within the token budget
turn_entries = [] # Will store (index, formatted_turn_content) for chronological ordering
total_turn_tokens = 0
file_embedding_tokens = sum(model_context.estimate_tokens(part) for part in history_parts)
# Process turns in reverse order (most recent first) to prioritize recent context
for idx in range(len(all_turns) - 1, -1, -1):
turn = all_turns[idx]
@@ -599,16 +600,16 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
# Build the complete turn content
turn_parts = []
# Add turn header with tool attribution for cross-tool tracking
turn_header = f"\n--- Turn {turn_num} ({role_label}"
if turn.tool_name:
turn_header += f" using {turn.tool_name}"
# Add model info if available
if turn.model_provider and turn.model_name:
turn_header += f" via {turn.model_provider}/{turn.model_name}"
turn_header += ") ---"
turn_parts.append(turn_header)
@@ -624,11 +625,11 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
# Add follow-up question if present
if turn.follow_up_question:
turn_parts.append(f"\n[Gemini's Follow-up: {turn.follow_up_question}]")
# Calculate tokens for this turn
turn_content = "\n".join(turn_parts)
turn_tokens = model_context.estimate_tokens(turn_content)
# Check if adding this turn would exceed history budget
if file_embedding_tokens + total_turn_tokens + turn_tokens > max_history_tokens:
# Stop adding turns - we've reached the limit
@@ -639,18 +640,18 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
logger.debug(f"[HISTORY] Would total: {file_embedding_tokens + total_turn_tokens + turn_tokens:,}")
logger.debug(f"[HISTORY] Budget: {max_history_tokens:,}")
break
# Add this turn to our list (we'll reverse it later for chronological order)
turn_entries.append((idx, turn_content))
total_turn_tokens += turn_tokens
# Reverse to get chronological order (oldest first)
turn_entries.reverse()
# Add the turns in chronological order
for _, turn_content in turn_entries:
history_parts.append(turn_content)
# Log what we included
included_turns = len(turn_entries)
total_turns = len(all_turns)

View File

@@ -6,12 +6,12 @@ ensuring that token limits are properly calculated based on the current model
being used, not global constants.
"""
from typing import Optional, Dict, Any
from dataclasses import dataclass
import logging
from dataclasses import dataclass
from typing import Any, Optional
from providers import ModelProviderRegistry, ModelCapabilities
from config import DEFAULT_MODEL
from providers import ModelCapabilities, ModelProviderRegistry
logger = logging.getLogger(__name__)
@@ -19,12 +19,13 @@ logger = logging.getLogger(__name__)
@dataclass
class TokenAllocation:
"""Token allocation strategy for a model."""
total_tokens: int
content_tokens: int
response_tokens: int
file_tokens: int
history_tokens: int
@property
def available_for_prompt(self) -> int:
"""Tokens available for the actual prompt after allocations."""
@@ -34,17 +35,17 @@ class TokenAllocation:
class ModelContext:
"""
Encapsulates model-specific information and token calculations.
This class provides a single source of truth for all model-related
token calculations, ensuring consistency across the system.
"""
def __init__(self, model_name: str):
self.model_name = model_name
self._provider = None
self._capabilities = None
self._token_allocation = None
@property
def provider(self):
"""Get the model provider lazily."""
@@ -53,78 +54,78 @@ class ModelContext:
if not self._provider:
raise ValueError(f"No provider found for model: {self.model_name}")
return self._provider
@property
def capabilities(self) -> ModelCapabilities:
"""Get model capabilities lazily."""
if self._capabilities is None:
self._capabilities = self.provider.get_capabilities(self.model_name)
return self._capabilities
def calculate_token_allocation(self, reserved_for_response: Optional[int] = None) -> TokenAllocation:
"""
Calculate token allocation based on model capacity.
Args:
reserved_for_response: Override response token reservation
Returns:
TokenAllocation with calculated budgets
"""
total_tokens = self.capabilities.max_tokens
# Dynamic allocation based on model capacity
if total_tokens < 300_000:
# Smaller context models (O3, GPT-4O): Conservative allocation
# Smaller context models (O3): Conservative allocation
content_ratio = 0.6 # 60% for content
response_ratio = 0.4 # 40% for response
file_ratio = 0.3 # 30% of content for files
history_ratio = 0.5 # 50% of content for history
else:
# Larger context models (Gemini): More generous allocation
content_ratio = 0.8 # 80% for content
content_ratio = 0.8 # 80% for content
response_ratio = 0.2 # 20% for response
file_ratio = 0.4 # 40% of content for files
history_ratio = 0.4 # 40% of content for history
# Calculate allocations
content_tokens = int(total_tokens * content_ratio)
response_tokens = reserved_for_response or int(total_tokens * response_ratio)
# Sub-allocations within content budget
file_tokens = int(content_tokens * file_ratio)
history_tokens = int(content_tokens * history_ratio)
allocation = TokenAllocation(
total_tokens=total_tokens,
content_tokens=content_tokens,
response_tokens=response_tokens,
file_tokens=file_tokens,
history_tokens=history_tokens
history_tokens=history_tokens,
)
logger.debug(f"Token allocation for {self.model_name}:")
logger.debug(f" Total: {allocation.total_tokens:,}")
logger.debug(f" Content: {allocation.content_tokens:,} ({content_ratio:.0%})")
logger.debug(f" Response: {allocation.response_tokens:,} ({response_ratio:.0%})")
logger.debug(f" Files: {allocation.file_tokens:,} ({file_ratio:.0%} of content)")
logger.debug(f" History: {allocation.history_tokens:,} ({history_ratio:.0%} of content)")
return allocation
def estimate_tokens(self, text: str) -> int:
"""
Estimate token count for text using model-specific tokenizer.
For now, uses simple estimation. Can be enhanced with model-specific
tokenizers (tiktoken for OpenAI, etc.) in the future.
"""
# TODO: Integrate model-specific tokenizers
# For now, use conservative estimation
return len(text) // 3 # Conservative estimate
@classmethod
def from_arguments(cls, arguments: Dict[str, Any]) -> "ModelContext":
def from_arguments(cls, arguments: dict[str, Any]) -> "ModelContext":
"""Create ModelContext from tool arguments."""
model_name = arguments.get("model") or DEFAULT_MODEL
return cls(model_name)
return cls(model_name)