"""
TracePath tool - Static call path prediction and control flow analysis

This tool analyzes code to predict and explain full call paths and control flow without executing code.
Given a method name, its owning class/module, and parameter combinations or runtime values, it predicts
the complete chain of method/function calls that would be triggered.

Key Features:
- Static call path prediction with confidence levels
- Polymorphism and dynamic dispatch analysis
- Value-driven flow analysis based on parameter combinations
- Side effects identification (database, network, filesystem)
- Branching analysis for conditional logic
- Hybrid AI-first approach with optional AST preprocessing for enhanced accuracy
"""

import logging
import os
import re
from typing import Any, Literal, Optional

from pydantic import Field

from config import TEMPERATURE_ANALYTICAL
from systemprompts import TRACEPATH_PROMPT

from .base import BaseTool, ToolRequest

logger = logging.getLogger(__name__)


class TracePathRequest(ToolRequest):
    """
    Request model for the tracepath tool.

    This model defines all parameters for customizing the call path analysis process.
    """

    entry_point: str = Field(
        ...,
        description="Method/function to trace (e.g., 'BookingManager::finalizeInvoice', 'utils.validate_input')",
    )
    files: list[str] = Field(
        ...,
        description="Code files or directories to analyze (must be absolute paths)",
    )
    parameters: Optional[dict[str, Any]] = Field(
        None,
        description="Parameter values to analyze - format: {param_name: value_or_type}",
    )
    context: Optional[str] = Field(
        None,
        description="Additional context about analysis goals or specific scenarios to focus on",
    )
    analysis_depth: Literal["shallow", "medium", "deep"] = Field(
        "medium",
        description="Analysis depth: shallow (direct calls), medium (2-3 levels), deep (full trace)",
    )
    language: Optional[str] = Field(
        None,
        description="Override auto-detection: python, javascript, typescript, csharp, java",
    )
    signature: Optional[str] = Field(
        None,
        description="Fully-qualified signature for overload resolution in languages like C#/Java",
    )
    confidence_threshold: Optional[float] = Field(
        0.7,
        description="Filter speculative branches (0-1, default 0.7)",
        ge=0.0,
        le=1.0,
    )
    include_db: bool = Field(
        True,
        description="Include database interactions in side effects analysis",
    )
    include_network: bool = Field(
        True,
        description="Include network calls in side effects analysis",
    )
    include_fs: bool = Field(
        True,
        description="Include filesystem operations in side effects analysis",
    )
    export_format: Literal["markdown", "json", "plantuml"] = Field(
        "markdown",
        description="Output format for the analysis results",
    )
    focus_areas: Optional[list[str]] = Field(
        None,
        description="Specific aspects to focus on (e.g., 'performance', 'security', 'error_handling')",
    )


class TracePathTool(BaseTool):
    """
    TracePath tool implementation.

    This tool analyzes code to predict static call paths and control flow without execution.
    Uses a hybrid AI-first approach with optional AST preprocessing for enhanced accuracy.
    """

    def get_name(self) -> str:
        return "tracepath"

    def get_description(self) -> str:
        return (
            "STATIC CALL PATH ANALYSIS - Predicts and explains full call paths and control flow without executing code. "
            "Given a method/function name and parameter values, traces the complete execution path including "
            "conditional branches, polymorphism resolution, and side effects. "
            "Perfect for: understanding complex code flows, impact analysis, debugging assistance, architecture review. "
            "Provides confidence levels for predictions and identifies uncertain calls due to dynamic behavior. "
            "Choose thinking_mode based on code complexity: 'low' for simple functions, "
            "'medium' for standard analysis (default), 'high' for complex systems, "
            "'max' for legacy codebases requiring deep analysis. "
            "Note: If you're not currently using a top-tier model such as Opus 4 or above, these tools can provide enhanced capabilities."
        )

    def get_input_schema(self) -> dict[str, Any]:
        schema = {
            "type": "object",
            "properties": {
                "entry_point": {
                    "type": "string",
                    "description": "Method/function to trace (e.g., 'BookingManager::finalizeInvoice', 'utils.validate_input')",
                },
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Code files or directories to analyze (must be absolute paths)",
                },
                "model": self.get_model_field_schema(),
                "parameters": {
                    "type": "object",
                    "description": "Parameter values to analyze - format: {param_name: value_or_type}",
                },
                "context": {
                    "type": "string",
                    "description": "Additional context about analysis goals or specific scenarios to focus on",
                },
                "analysis_depth": {
                    "type": "string",
                    "enum": ["shallow", "medium", "deep"],
                    "default": "medium",
                    "description": "Analysis depth: shallow (direct calls), medium (2-3 levels), deep (full trace)",
                },
                "language": {
                    "type": "string",
                    "enum": ["python", "javascript", "typescript", "csharp", "java"],
                    "description": "Override auto-detection for programming language",
                },
                "signature": {
                    "type": "string",
                    "description": "Fully-qualified signature for overload resolution",
                },
                "confidence_threshold": {
                    "type": "number",
                    "minimum": 0.0,
                    "maximum": 1.0,
                    "default": 0.7,
                    "description": "Filter speculative branches (0-1)",
                },
                "include_db": {
                    "type": "boolean",
                    "default": True,
                    "description": "Include database interactions in analysis",
                },
                "include_network": {
                    "type": "boolean",
                    "default": True,
                    "description": "Include network calls in analysis",
                },
                "include_fs": {
                    "type": "boolean",
                    "default": True,
                    "description": "Include filesystem operations in analysis",
                },
                "export_format": {
                    "type": "string",
                    "enum": ["markdown", "json", "plantuml"],
                    "default": "markdown",
                    "description": "Output format for analysis results",
                },
                "focus_areas": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Specific aspects to focus on",
                },
                "temperature": {
                    "type": "number",
                    "description": "Temperature (0-1, default 0.2 for analytical precision)",
                    "minimum": 0,
                    "maximum": 1,
                },
                "thinking_mode": {
                    "type": "string",
                    "enum": ["minimal", "low", "medium", "high", "max"],
                    "description": "Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), high (67%), max (100% of model max)",
                },
                "use_websearch": {
                    "type": "boolean",
                    "description": "Enable web search for framework documentation and patterns",
                    "default": True,
                },
                "continuation_id": {
                    "type": "string",
                    "description": "Thread continuation ID for multi-turn conversations across tools",
                },
            },
            "required": ["entry_point", "files"] + (["model"] if self.is_effective_auto_mode() else []),
        }

        return schema

    def get_system_prompt(self) -> str:
        return TRACEPATH_PROMPT

    def get_default_temperature(self) -> float:
        return TEMPERATURE_ANALYTICAL

    # Line numbers are enabled by default for precise code references

    def get_model_category(self):
        """TracePath requires extended reasoning for complex flow analysis"""
        from tools.models import ToolModelCategory

        return ToolModelCategory.EXTENDED_REASONING

    def get_request_model(self):
        return TracePathRequest

    def detect_primary_language(self, file_paths: list[str]) -> str:
        """
        Detect the primary programming language from file extensions.

        Args:
            file_paths: List of file paths to analyze

        Returns:
            str: Detected language or "mixed" if multiple languages found
        """
        # Language detection based on file extensions
        language_extensions = {
            "python": {".py", ".pyx", ".pyi"},
            "javascript": {".js", ".jsx", ".mjs", ".cjs"},
            "typescript": {".ts", ".tsx", ".mts", ".cts"},
            "java": {".java"},
            "csharp": {".cs"},
            "cpp": {".cpp", ".cc", ".cxx", ".c", ".h", ".hpp"},
            "go": {".go"},
            "rust": {".rs"},
            "swift": {".swift"},
            "kotlin": {".kt", ".kts"},
            "ruby": {".rb"},
            "php": {".php"},
            "scala": {".scala"},
        }

        # Count files by language
        language_counts = {}
        for file_path in file_paths:
            extension = os.path.splitext(file_path.lower())[1]
            for lang, exts in language_extensions.items():
                if extension in exts:
                    language_counts[lang] = language_counts.get(lang, 0) + 1
                    break

        if not language_counts:
            return "unknown"

        # Return most common language, or "mixed" if multiple languages
        max_count = max(language_counts.values())
        dominant_languages = [lang for lang, count in language_counts.items() if count == max_count]

        if len(dominant_languages) == 1:
            return dominant_languages[0]
        else:
            return "mixed"

    def parse_entry_point(self, entry_point: str, language: str) -> dict[str, str]:
        """
        Parse entry point string to extract class/module and method/function information.

        Args:
            entry_point: Entry point string (e.g., "BookingManager::finalizeInvoice", "utils.validate_input")
            language: Detected or specified programming language

        Returns:
            dict: Parsed entry point information
        """
        result = {
            "raw": entry_point,
            "class_or_module": "",
            "method_or_function": "",
            "type": "unknown",
        }

        # Common patterns across languages
        patterns = {
            # Class::method (C++, PHP style)
            "class_method_double_colon": r"^([A-Za-z_][A-Za-z0-9_]*?)::([A-Za-z_][A-Za-z0-9_]*?)$",
            # Module.function or Class.method (Python, JavaScript, etc.)
            "module_function_dot": r"^([A-Za-z_][A-Za-z0-9_]*?)\.([A-Za-z_][A-Za-z0-9_]*?)$",
            # Nested module.submodule.function
            "nested_module_dot": r"^([A-Za-z_][A-Za-z0-9_.]*?)\.([A-Za-z_][A-Za-z0-9_]*?)$",
            # Just function name
            "function_only": r"^([A-Za-z_][A-Za-z0-9_]*?)$",
        }

        # Try patterns in order of specificity
        for pattern_name, pattern in patterns.items():
            match = re.match(pattern, entry_point.strip())
            if match:
                if pattern_name == "function_only":
                    result["method_or_function"] = match.group(1)
                    result["type"] = "function"
                else:
                    result["class_or_module"] = match.group(1)
                    result["method_or_function"] = match.group(2)

                    # Determine if it's a class method or module function based on naming conventions
                    if pattern_name == "class_method_double_colon":
                        result["type"] = "method"
                    elif result["class_or_module"][0].isupper():
                        result["type"] = "method"  # Likely class method (CamelCase)
                    else:
                        result["type"] = "function"  # Likely module function (snake_case)
                break

        logger.debug(f"[TRACEPATH] Parsed entry point '{entry_point}' as: {result}")
        return result

    async def _generate_structural_summary(self, files: list[str], language: str) -> str:
        """
        Generate structural summary of the code using AST parsing.

        Phase 1: Returns empty string (pure AI-driven approach)
        Phase 2: Will contain language-specific AST parsing logic

        Args:
            files: List of file paths to analyze
            language: Detected programming language

        Returns:
            str: Structural summary or empty string for Phase 1
        """
        # Phase 1 implementation: Pure AI-driven approach
        # Phase 2 will add AST parsing for enhanced context

        if language == "python":
            # Placeholder for Python AST parsing using built-in 'ast' module
            # Will extract class definitions, method signatures, and direct calls
            pass
        elif language in ["javascript", "typescript"]:
            # Placeholder for JavaScript/TypeScript parsing using acorn or TS compiler API
            pass
        elif language == "csharp":
            # Placeholder for C# parsing using Microsoft Roslyn SDK
            pass
        elif language == "java":
            # Placeholder for Java parsing (future implementation)
            pass

        # For Phase 1, return empty to rely on pure LLM analysis
        logger.debug(f"[TRACEPATH] Phase 1: No structural summary generated for {language}")
        return ""

    async def prepare_prompt(self, request: TracePathRequest) -> str:
        """
        Prepare the complete prompt for call path analysis.

        This method combines:
        - System prompt with analysis instructions
        - User context and entry point information
        - File contents with line numbers
        - Structural summary (Phase 2)
        - Analysis parameters and constraints

        Args:
            request: The validated tracepath request

        Returns:
            str: Complete prompt for the model

        Raises:
            ValueError: If the prompt exceeds token limits
        """
        logger.info(
            f"[TRACEPATH] Preparing prompt for entry point '{request.entry_point}' with {len(request.files)} files"
        )
        logger.debug(f"[TRACEPATH] Analysis depth: {request.analysis_depth}, Export format: {request.export_format}")

        # Check for prompt.txt in files
        prompt_content, updated_files = self.handle_prompt_file(request.files)

        # If prompt.txt was found, incorporate it into the context
        if prompt_content:
            logger.debug("[TRACEPATH] Found prompt.txt file, incorporating content")
            if request.context:
                request.context = prompt_content + "\n\n" + request.context
            else:
                request.context = prompt_content

        # Update request files list
        if updated_files is not None:
            logger.debug(f"[TRACEPATH] Updated files list after prompt.txt processing: {len(updated_files)} files")
            request.files = updated_files

        # Check user input size at MCP transport boundary (before adding internal content)
        if request.context:
            size_check = self.check_prompt_size(request.context)
            if size_check:
                from tools.models import ToolOutput

                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")

        # Detect or use specified language
        if request.language:
            primary_language = request.language
            logger.debug(f"[TRACEPATH] Using specified language: {primary_language}")
        else:
            primary_language = self.detect_primary_language(request.files)
            logger.debug(f"[TRACEPATH] Detected primary language: {primary_language}")

        # Parse entry point
        entry_point_info = self.parse_entry_point(request.entry_point, primary_language)
        logger.debug(f"[TRACEPATH] Entry point parsed as: {entry_point_info}")

        # Generate structural summary (Phase 1: returns empty, Phase 2: AST analysis)
        continuation_id = getattr(request, "continuation_id", None)
        structural_summary = await self._generate_structural_summary(request.files, primary_language)

        # Use centralized file processing logic for main code files (with line numbers enabled)
        logger.debug(f"[TRACEPATH] Preparing {len(request.files)} code files for analysis")
        code_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code to analyze")

        if code_content:
            from utils.token_utils import estimate_tokens

            code_tokens = estimate_tokens(code_content)
            logger.info(f"[TRACEPATH] Code files embedded successfully: {code_tokens:,} tokens")
        else:
            logger.warning("[TRACEPATH] No code content after file processing")

        # Build the complete prompt
        prompt_parts = []

        # Add system prompt
        prompt_parts.append(self.get_system_prompt())

        # Add structural summary if available (Phase 2)
        if structural_summary:
            prompt_parts.append("\n=== STRUCTURAL SUMMARY ===")
            prompt_parts.append(structural_summary)
            prompt_parts.append("=== END STRUCTURAL SUMMARY ===")

        # Add user context and analysis parameters
        prompt_parts.append("\n=== ANALYSIS REQUEST ===")
        prompt_parts.append(f"Entry Point: {request.entry_point}")
        if entry_point_info["type"] != "unknown":
            prompt_parts.append(
                f"Parsed as: {entry_point_info['type']} '{entry_point_info['method_or_function']}' in {entry_point_info['class_or_module'] or 'global scope'}"
            )

        prompt_parts.append(f"Language: {primary_language}")
        prompt_parts.append(f"Analysis Depth: {request.analysis_depth}")
        prompt_parts.append(f"Confidence Threshold: {request.confidence_threshold}")

        if request.signature:
            prompt_parts.append(f"Method Signature: {request.signature}")

        if request.parameters:
            prompt_parts.append(f"Parameter Values: {request.parameters}")

        # Side effects configuration
        side_effects = []
        if request.include_db:
            side_effects.append("database")
        if request.include_network:
            side_effects.append("network")
        if request.include_fs:
            side_effects.append("filesystem")
        if side_effects:
            prompt_parts.append(f"Include Side Effects: {', '.join(side_effects)}")

        if request.focus_areas:
            prompt_parts.append(f"Focus Areas: {', '.join(request.focus_areas)}")

        if request.context:
            prompt_parts.append(f"Additional Context: {request.context}")

        prompt_parts.append(f"Export Format: {request.export_format}")
        prompt_parts.append("=== END REQUEST ===")

        # Add web search instruction if enabled
        websearch_instruction = self.get_websearch_instruction(
            request.use_websearch,
            f"""When analyzing call paths for {primary_language} code, consider if searches for these would help:
- Framework-specific call patterns and lifecycle methods
- Language-specific dispatch mechanisms and polymorphism
- Common side-effect patterns for libraries used in the code
- Documentation for external APIs and services called
- Known design patterns that affect call flow""",
        )
        if websearch_instruction:
            prompt_parts.append(websearch_instruction)

        # Add main code to analyze
        prompt_parts.append("\n=== CODE TO ANALYZE ===")
        prompt_parts.append(code_content)
        prompt_parts.append("=== END CODE ===")

        # Add analysis instructions
        analysis_instructions = [
            f"\nPlease perform a {request.analysis_depth} static call path analysis for the entry point '{request.entry_point}'."
        ]

        if request.parameters:
            analysis_instructions.append(
                "Pay special attention to how the provided parameter values affect the execution flow."
            )

        if request.confidence_threshold < 1.0:
            analysis_instructions.append(
                f"Filter out speculative paths with confidence below {request.confidence_threshold}."
            )

        analysis_instructions.append(f"Format the output as {request.export_format}.")

        prompt_parts.extend(analysis_instructions)

        full_prompt = "\n".join(prompt_parts)

        # Log final prompt statistics
        from utils.token_utils import estimate_tokens

        total_tokens = estimate_tokens(full_prompt)
        logger.info(f"[TRACEPATH] Complete prompt prepared: {total_tokens:,} tokens, {len(full_prompt):,} characters")

        return full_prompt

    def format_response(self, response: str, request: TracePathRequest, model_info: Optional[dict] = None) -> str:
        """
        Format the call path analysis response.

        The base tool handles structured response validation via SPECIAL_STATUS_MODELS,
        so this method focuses on providing clear guidance for next steps.

        Args:
            response: The raw analysis from the model
            request: The original request for context
            model_info: Optional dict with model metadata

        Returns:
            str: The response with additional guidance
        """
        logger.debug(f"[TRACEPATH] Formatting response for entry point '{request.entry_point}'")

        # Get the friendly model name
        model_name = "the model"
        if model_info and model_info.get("model_response"):
            model_name = model_info["model_response"].friendly_name or "the model"

        # Add contextual footer based on analysis depth and format
        if request.export_format == "json":
            footer = f"""
---

**Analysis Complete**: {model_name} has provided a structured JSON analysis of the call path for `{request.entry_point}`.

**Next Steps**:
- Review the confidence levels for each predicted call
- Investigate any uncertain calls marked with low confidence
- Use this analysis for impact assessment, debugging, or architecture review
- For deeper analysis, increase analysis_depth to 'deep' or provide additional context files
"""
        elif request.export_format == "plantuml":
            footer = f"""
---

**Analysis Complete**: {model_name} has generated a PlantUML diagram showing the call path for `{request.entry_point}`.

**Next Steps**:
- Render the PlantUML diagram to visualize the call flow
- Review branching points and conditional logic
- Verify the predicted paths against your understanding of the code
- Use this for documentation or architectural discussions
"""
        else:  # markdown
            footer = f"""
---

**Analysis Complete**: {model_name} has traced the execution path for `{request.entry_point}` at {request.analysis_depth} depth.

**Next Steps**:
- Review the call path summary and confidence assessments
- Pay attention to uncertain calls that may require runtime verification
- Use the code anchors (file:line references) to navigate to critical decision points
- Consider this analysis for debugging, impact assessment, or refactoring decisions
"""

        return f"{response}{footer}"