Files
my-pal-mcp-server/tools/tracer.py
Fahad 86728a1442 WIP
2025-06-15 15:32:41 +04:00

457 lines
17 KiB
Python

"""
Tracer tool - Static call path prediction and control flow analysis
This tool analyzes code to predict and explain full call paths and control flow without executing code.
Given a method name, its owning class/module, and parameter combinations or runtime values, it predicts
the complete chain of method/function calls that would be triggered.
Key Features:
- Static call path prediction with confidence levels
- Polymorphism and dynamic dispatch analysis
- Value-driven flow analysis based on parameter combinations
- Side effects identification (database, network, filesystem)
- Branching analysis for conditional logic
- Hybrid AI-first approach with optional AST preprocessing for enhanced accuracy
"""
import logging
import os
from typing import Any, Literal, Optional
from pydantic import Field
from config import TEMPERATURE_ANALYTICAL
from systemprompts import TRACER_PROMPT
from .base import BaseTool, ToolRequest
logger = logging.getLogger(__name__)
class TracerRequest(ToolRequest):
"""
Request model for the tracer tool.
This model defines the simplified parameters for static code analysis.
"""
prompt: str = Field(
...,
description="Description of what to trace including method/function name and class/file context (e.g., 'Trace BookingManager::finalizeInvoice method' or 'Analyze dependencies for validate_input function in utils module')",
)
files: list[str] = Field(
...,
description="Code files or directories to analyze (must be absolute paths)",
)
trace_mode: Literal["precision", "dependencies"] = Field(
...,
description="Trace mode: 'precision' (follows actual code execution path from entry point) or 'dependencies' (analyzes bidirectional dependency mapping showing what calls this target and what it calls)",
)
class TracerTool(BaseTool):
"""
Tracer tool implementation.
This tool analyzes code to predict static call paths and control flow without execution.
Uses a hybrid AI-first approach with optional AST preprocessing for enhanced accuracy.
"""
def get_name(self) -> str:
return "tracer"
def get_description(self) -> str:
return (
"STATIC CODE ANALYSIS - Analyzes code to provide either execution flow traces or dependency mappings without executing code. "
"Type 'precision': Follows the actual code path from a specified method/function, resolving calls, branching, and side effects. "
"Type 'dependencies': Analyzes bidirectional dependencies showing what calls the target and what it calls, including imports and inheritance. "
"Perfect for: understanding complex code flows, impact analysis, debugging assistance, architecture review. "
"Responds in structured JSON format for easy parsing and visualization. "
"Choose thinking_mode based on code complexity: 'medium' for standard analysis (default), "
"'high' for complex systems, 'max' for legacy codebases requiring deep analysis. "
"Note: If you're not currently using a top-tier model such as Opus 4 or above, these tools can provide enhanced capabilities."
)
def get_input_schema(self) -> dict[str, Any]:
schema = {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "Description of what to trace including method/function name and class/file context (e.g., 'Trace BookingManager::finalizeInvoice method' or 'Analyze dependencies for validate_input function in utils module')",
},
"files": {
"type": "array",
"items": {"type": "string"},
"description": "Code files or directories to analyze (must be absolute paths)",
},
"trace_mode": {
"type": "string",
"enum": ["precision", "dependencies"],
"description": "Trace mode: 'precision' (follows actual code execution path from entry point) or 'dependencies' (analyzes bidirectional dependency mapping showing what calls this target and what it calls)",
},
"model": self.get_model_field_schema(),
"temperature": {
"type": "number",
"description": "Temperature (0-1, default 0.2 for analytical precision)",
"minimum": 0,
"maximum": 1,
},
"thinking_mode": {
"type": "string",
"enum": ["minimal", "low", "medium", "high", "max"],
"description": "Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), high (67%), max (100% of model max)",
},
"use_websearch": {
"type": "boolean",
"description": "Enable web search for framework documentation and patterns",
"default": True,
},
"continuation_id": {
"type": "string",
"description": "Thread continuation ID for multi-turn conversations across tools",
},
},
"required": ["prompt", "files", "trace_mode"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema
def get_system_prompt(self) -> str:
return TRACER_PROMPT
def get_default_temperature(self) -> float:
return TEMPERATURE_ANALYTICAL
# Line numbers are enabled by default for precise code references
def get_model_category(self):
"""Tracer requires extended reasoning for complex flow analysis"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
def get_request_model(self):
return TracerRequest
def detect_primary_language(self, file_paths: list[str]) -> str:
"""
Detect the primary programming language from file extensions.
Args:
file_paths: List of file paths to analyze
Returns:
str: Detected language or "mixed" if multiple languages found
"""
# Language detection based on file extensions
language_extensions = {
"python": {".py", ".pyx", ".pyi"},
"javascript": {".js", ".jsx", ".mjs", ".cjs"},
"typescript": {".ts", ".tsx", ".mts", ".cts"},
"java": {".java"},
"csharp": {".cs"},
"cpp": {".cpp", ".cc", ".cxx", ".c", ".h", ".hpp"},
"go": {".go"},
"rust": {".rs"},
"swift": {".swift"},
"kotlin": {".kt", ".kts"},
"ruby": {".rb"},
"php": {".php"},
"scala": {".scala"},
}
# Count files by language
language_counts = {}
for file_path in file_paths:
extension = os.path.splitext(file_path.lower())[1]
for lang, exts in language_extensions.items():
if extension in exts:
language_counts[lang] = language_counts.get(lang, 0) + 1
break
if not language_counts:
return "unknown"
# Return most common language, or "mixed" if multiple languages
max_count = max(language_counts.values())
dominant_languages = [lang for lang, count in language_counts.items() if count == max_count]
if len(dominant_languages) == 1:
return dominant_languages[0]
else:
return "mixed"
async def prepare_prompt(self, request: TracerRequest) -> str:
"""
Prepare the complete prompt for code analysis.
This method combines:
- System prompt with analysis instructions
- User request and trace type
- File contents with line numbers
- Analysis parameters
Args:
request: The validated tracer request
Returns:
str: Complete prompt for the model
Raises:
ValueError: If the prompt exceeds token limits
"""
logger.info(
f"[TRACER] Preparing prompt for {request.trace_mode} trace analysis with {len(request.files)} files"
)
logger.debug(f"[TRACER] User request: {request.prompt[:100]}...")
# Check for prompt.txt in files
prompt_content, updated_files = self.handle_prompt_file(request.files)
# If prompt.txt was found, incorporate it into the request prompt
if prompt_content:
logger.debug("[TRACER] Found prompt.txt file, incorporating content")
request.prompt = prompt_content + "\n\n" + request.prompt
# Update request files list
if updated_files is not None:
logger.debug(f"[TRACER] Updated files list after prompt.txt processing: {len(updated_files)} files")
request.files = updated_files
# Check user input size at MCP transport boundary (before adding internal content)
size_check = self.check_prompt_size(request.prompt)
if size_check:
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
# Detect primary language
primary_language = self.detect_primary_language(request.files)
logger.debug(f"[TRACER] Detected primary language: {primary_language}")
# Use centralized file processing logic for main code files (with line numbers enabled)
continuation_id = getattr(request, "continuation_id", None)
logger.debug(f"[TRACER] Preparing {len(request.files)} code files for analysis")
code_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code to analyze")
if code_content:
from utils.token_utils import estimate_tokens
code_tokens = estimate_tokens(code_content)
logger.info(f"[TRACER] Code files embedded successfully: {code_tokens:,} tokens")
else:
logger.warning("[TRACER] No code content after file processing")
# Build the complete prompt
prompt_parts = []
# Add system prompt
prompt_parts.append(self.get_system_prompt())
# Add user request and analysis parameters
prompt_parts.append("\n=== ANALYSIS REQUEST ===")
prompt_parts.append(f"User Request: {request.prompt}")
prompt_parts.append(f"Trace Mode: {request.trace_mode}")
prompt_parts.append(f"Language: {primary_language}")
prompt_parts.append("=== END REQUEST ===")
# Add web search instruction if enabled
websearch_instruction = self.get_websearch_instruction(
getattr(request, "use_websearch", True),
f"""When analyzing code for {primary_language}, consider if searches for these would help:
- Framework-specific call patterns and lifecycle methods
- Language-specific dispatch mechanisms and polymorphism
- Common side-effect patterns for libraries used in the code
- Documentation for external APIs and services called
- Known design patterns that affect call flow""",
)
if websearch_instruction:
prompt_parts.append(websearch_instruction)
# Add main code to analyze
prompt_parts.append("\n=== CODE TO ANALYZE ===")
prompt_parts.append(code_content)
prompt_parts.append("=== END CODE ===")
# Add analysis instructions
prompt_parts.append(f"\nPlease perform a {request.trace_mode} trace analysis based on the user request.")
full_prompt = "\n".join(prompt_parts)
# Log final prompt statistics
from utils.token_utils import estimate_tokens
total_tokens = estimate_tokens(full_prompt)
logger.info(f"[TRACER] Complete prompt prepared: {total_tokens:,} tokens, {len(full_prompt):,} characters")
return full_prompt
def format_response(self, response: str, request: TracerRequest, model_info: Optional[dict] = None) -> str:
"""
Format the code analysis response with mode-specific rendering instructions.
The base tool handles structured response validation via SPECIAL_STATUS_MODELS,
so this method focuses on providing clear rendering instructions for Claude.
Args:
response: The raw analysis from the model
request: The original request for context
model_info: Optional dict with model metadata
Returns:
str: The response with mode-specific rendering instructions
"""
logger.debug(f"[TRACER] Formatting response for {request.trace_mode} trace analysis")
# Get the friendly model name
model_name = "the model"
if model_info and model_info.get("model_response"):
model_name = model_info["model_response"].friendly_name or "the model"
# Base tool will handle trace_complete JSON responses via SPECIAL_STATUS_MODELS
# No need for manual JSON parsing here
# Generate mode-specific rendering instructions
rendering_instructions = self._get_rendering_instructions(request.trace_mode)
# Create the complete response with rendering instructions
footer = f"""
---
**Analysis Complete**: {model_name} has completed a {request.trace_mode} analysis as requested.
{rendering_instructions}
**GENERAL REQUIREMENTS:**
- Follow the rendering instructions EXACTLY as specified above
- Use only the data provided in the JSON response
- Maintain exact formatting for readability
- Include file paths and line numbers as provided
- Do not add explanations or commentary outside the specified format"""
return f"{response}{footer}"
def _get_rendering_instructions(self, trace_mode: str) -> str:
"""
Get mode-specific rendering instructions for Claude.
Args:
trace_mode: Either "precision" or "dependencies"
Returns:
str: Complete rendering instructions for the specified mode
"""
if trace_mode == "precision":
return self._get_precision_rendering_instructions()
else: # dependencies mode
return self._get_dependencies_rendering_instructions()
def _get_precision_rendering_instructions(self) -> str:
"""Get rendering instructions for precision trace mode."""
return """
## MANDATORY RENDERING INSTRUCTIONS FOR PRECISION TRACE
You MUST render the trace analysis in exactly two views:
### 1. CALL FLOW DIAGRAM (TOP-DOWN)
Use this exact format:
```
[Class::Method] (file: /path, line: ##)
[Class::CalledMethod] (file: /path, line: ##)
...
```
**Rules:**
- Chain each call using ↓ or → for readability
- Include file name and line number per method
- If the call is conditional, append `? if condition`
- If ambiguous, mark with `⚠️ ambiguous branch`
- Indent nested calls appropriately
### 2. BRANCHING & SIDE EFFECT TABLE
Render exactly this table format:
| Location | Condition | Branches | Ambiguous |
|----------|-----------|----------|-----------|
| /file/path:## | if condition | method1(), method2() | ✅/❌ |
**Side Effects section:**
```
Side Effects:
- [database] description (File.ext:##)
- [network] description (File.ext:##)
- [filesystem] description (File.ext:##)
```
**CRITICAL RULES:**
- ALWAYS render both views unless data is missing
- Use exact filenames, class names, and line numbers from JSON
- DO NOT invent function names or examples
- Mark ambiguous branches with ⚠️ or ✅
- If sections are empty, omit them cleanly"""
def _get_dependencies_rendering_instructions(self) -> str:
"""Get rendering instructions for dependencies trace mode."""
return """
## MANDATORY RENDERING INSTRUCTIONS FOR DEPENDENCIES TRACE
You MUST render the trace analysis in exactly two views:
### 1. DEPENDENCY FLOW GRAPH
Use this exact format:
**Incoming:**
```
Called by:
- [CallerClass::callerMethod] ← /path/file.ext:##
- [ServiceImpl::run] ← /path/file.ext:##
```
**Outgoing:**
```
Calls:
- [Logger::logAction] → /utils/log.ext:##
- [PaymentClient::send] → /clients/pay.ext:##
```
**Type Dependencies:**
```
- conforms_to: ProtocolName
- implements: InterfaceName
- imports: ModuleName, LibraryName
```
**State Access:**
```
- reads: property.name (line ##)
- writes: object.field (line ##)
```
**Arrow Rules:**
- `←` for incoming (who calls this)
- `→` for outgoing (what this calls)
### 2. DEPENDENCY TABLE
Render exactly this table format:
| Type | From/To | Method | File | Line |
|------|---------|--------|------|------|
| direct_call | From: CallerClass | callerMethod | /path/file.ext | ## |
| method_call | To: TargetClass | targetMethod | /path/file.ext | ## |
| uses_property | To: ObjectClass | .propertyName | /path/file.ext | ## |
| conforms_to | Self: ThisClass | — | /path/file.ext | — |
**CRITICAL RULES:**
- ALWAYS render both views unless data is missing
- Use exact filenames, class names, and line numbers from JSON
- DO NOT invent function names or examples
- If sections (state access, type dependencies) are empty, omit them cleanly
- Show directional dependencies with proper arrows"""