Vastly improved debug tool and related instructions

Accompanying simulation test Cleanup - A single source of truth for parameter descriptions
2025-06-17 16:23:26 +04:00
parent 9bf2a2a51c
commit 044a8621a3
12 changed files with 829 additions and 238 deletions
--- a/communication_simulator_test.py
+++ b/communication_simulator_test.py
@@ -33,6 +33,9 @@ Available tests:
    openrouter_fallback         - OpenRouter fallback mechanism testing
    openrouter_models           - OpenRouter models availability testing
    token_allocation_validation - Token allocation and limits validation
+    testgen_validation          - TestGen tool validation with specific test function
+    refactor_validation         - Refactor tool validation with codesmells
+    debug_validation            - Debug tool validation with actual bugs
    conversation_chain_validation - Conversation chain continuity validation

 Examples:
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "4.9.2"
+__version__ = "4.9.3"
 # Last update date in ISO format
 __updated__ = "2025-06-17"
 # Primary maintainer
--- a/docs/tools/debug.md
+++ b/docs/tools/debug.md
@@ -2,7 +2,8 @@

 **Root cause analysis for complex problems**

-The `debug` tool provides systematic debugging assistance with root cause analysis, hypothesis generation, and structured problem-solving approaches for complex technical issues.
+The `debug` tool provides systematic debugging assistance with root cause analysis, hypothesis generation, and 
+structured problem-solving approaches for complex technical issues.

 ## Thinking Mode

@@ -12,10 +13,16 @@ The `debug` tool provides systematic debugging assistance with root cause analys

 **Basic Usage:**
 ```
-"Use gemini to debug this TypeError: 'NoneType' object has no attribute 'split'"
-"Get gemini to debug why my API returns 500 errors with the full stack trace: [paste traceback]"
+Get gemini to debug why my API returns 400 errors randomly with the full stack trace: [paste traceback]
 ```

+## How It Works 
+
+Just because Claude gets to use a development partner doesn't mean it's off the hook! 
+Claude does the initial groundwork of investigation and then passes this on to the other model - just as a developer 
+would for a second opinion when involving another, with enough context. This results in a significant improvement in
+bug hunting and reduces the chance of wasting precious tokens back and forth.
+
 ## Key Features

 - **Generates multiple ranked hypotheses** for systematic debugging
--- a/simulator_tests/init.py
+++ b/simulator_tests/init.py
@@ -14,6 +14,7 @@ from .test_content_validation import ContentValidationTest
 from .test_conversation_chain_validation import ConversationChainValidationTest
 from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
 from .test_cross_tool_continuation import CrossToolContinuationTest
+from .test_debug_validation import DebugValidationTest
 from .test_line_number_validation import LineNumberValidationTest
 from .test_logs_validation import LogsValidationTest
 from .test_model_thinking_config import TestModelThinkingConfig
@@ -48,6 +49,7 @@ TEST_REGISTRY = {
    "token_allocation_validation": TokenAllocationValidationTest,
    "testgen_validation": TestGenValidationTest,
    "refactor_validation": RefactorValidationTest,
+    "debug_validation": DebugValidationTest,
    "conversation_chain_validation": ConversationChainValidationTest,
    "vision_capability": VisionCapabilityTest,
    "xai_models": XAIModelsTest,
@@ -76,6 +78,7 @@ __all__ = [
    "TokenAllocationValidationTest",
    "TestGenValidationTest",
    "RefactorValidationTest",
+    "DebugValidationTest",
    "ConversationChainValidationTest",
    "VisionCapabilityTest",
    "XAIModelsTest",
--- a/simulator_tests/test_debug_validation.py
+++ b/simulator_tests/test_debug_validation.py
@@ -0,0 +1,436 @@
+#!/usr/bin/env python3
+"""
+Debug Tool Validation Test
+
+Tests the debug tool with real bugs to validate:
+- Proper execution with flash model
+- Actual bug identification and analysis
+- Hypothesis generation for root causes
+- Log validation for tool execution
+"""
+
+import json
+
+from .base_test import BaseSimulatorTest
+
+
+class DebugValidationTest(BaseSimulatorTest):
+    """Test debug tool with actual bug scenarios"""
+
+    @property
+    def test_name(self) -> str:
+        return "debug_validation"
+
+    @property
+    def test_description(self) -> str:
+        return "Debug tool validation with actual bugs"
+
+    def run_test(self) -> bool:
+        """Test debug tool with real bugs"""
+        try:
+            self.logger.info("Test: Debug tool validation")
+
+            # Setup test files directory first
+            self.setup_test_files()
+
+            # Create a Python file with a subtle but realistic bug
+            buggy_code = """#!/usr/bin/env python3
+import json
+import requests
+from datetime import datetime, timedelta
+
+class UserSessionManager:
+    def __init__(self):
+        self.active_sessions = {}
+        self.session_timeout = 30 * 60  # 30 minutes in seconds
+
+    def create_session(self, user_id, user_data):
+        \"\"\"Create a new user session\"\"\"
+        session_id = f"sess_{user_id}_{datetime.now().timestamp()}"
+
+        session_info = {
+            'user_id': user_id,
+            'user_data': user_data,
+            'created_at': datetime.now(),
+            'last_activity': datetime.now(),
+            'expires_at': datetime.now() + timedelta(seconds=self.session_timeout)
+        }
+
+        self.active_sessions[session_id] = session_info
+        return session_id
+
+    def validate_session(self, session_id):
+        \"\"\"Check if session is valid and not expired\"\"\"
+        if session_id not in self.active_sessions:
+            return False
+
+        session = self.active_sessions[session_id]
+        current_time = datetime.now()
+
+        # Check if session has expired
+        if current_time > session['expires_at']:
+            del self.active_sessions[session_id]
+            return False
+
+        # Update last activity
+        session['last_activity'] = current_time
+        return True
+
+    def cleanup_expired_sessions(self):
+        \"\"\"Remove expired sessions from memory\"\"\"
+        current_time = datetime.now()
+        expired_sessions = []
+
+        for session_id, session in self.active_sessions.items():
+            if current_time > session['expires_at']:
+                expired_sessions.append(session_id)
+
+        for session_id in expired_sessions:
+            del self.active_sessions[session_id]
+
+        return len(expired_sessions)
+
+class APIHandler:
+    def __init__(self):
+        self.session_manager = UserSessionManager()
+        self.request_count = 0
+
+    def authenticate_user(self, username, password):
+        \"\"\"Authenticate user and create session\"\"\"
+        # Simulate API call to auth service
+        auth_response = self._call_auth_service(username, password)
+
+        if auth_response.get('success'):
+            user_data = auth_response.get('user_data', {})
+            session_id = self.session_manager.create_session(
+                user_data['id'], user_data
+            )
+            return {'success': True, 'session_id': session_id}
+
+        return {'success': False, 'error': 'Authentication failed'}
+
+    def process_request(self, session_id, request_data):
+        \"\"\"Process an API request with session validation\"\"\"
+        self.request_count += 1
+
+        # Validate session before processing
+        if not self.session_manager.validate_session(session_id):
+            return {'error': 'Invalid or expired session', 'code': 401}
+
+        # Simulate request processing
+        try:
+            result = self._process_business_logic(request_data)
+            return {'success': True, 'data': result}
+        except Exception as e:
+            return {'error': str(e), 'code': 500}
+
+    def _call_auth_service(self, username, password):
+        \"\"\"Simulate external authentication service call\"\"\"
+        # Simulate network delay and response
+        import time
+        time.sleep(0.1)
+
+        # Mock successful authentication
+        if username and password:
+            return {
+                'success': True,
+                'user_data': {
+                    'id': hash(username) % 10000,
+                    'username': username,
+                    'roles': ['user']
+                }
+            }
+        return {'success': False}
+
+    def _process_business_logic(self, request_data):
+        \"\"\"Simulate business logic processing\"\"\"
+        if not request_data:
+            raise ValueError("Invalid request data")
+
+        # Simulate some processing
+        return {
+            'processed_at': datetime.now().isoformat(),
+            'request_id': self.request_count,
+            'status': 'completed'
+        }
+
+# Global API handler instance
+api_handler = APIHandler()
+
+def handle_api_request(session_id, request_data):
+    \"\"\"Main API request handler\"\"\"
+    return api_handler.process_request(session_id, request_data)
+"""
+
+            # Create test file with subtle bug
+            test_file = self.create_additional_test_file("session_manager.py", buggy_code)
+            self.logger.info(f"  ✅ Created test file with subtle bug: {test_file}")
+
+            # Create a realistic problem description with subtle symptoms
+            error_description = """ISSUE DESCRIPTION:
+Our API service is experiencing intermittent session validation failures in production.
+
+SYMPTOMS OBSERVED:
+- Users randomly get "Invalid or expired session" errors even with valid sessions
+- The issue happens more frequently during high-traffic periods
+- Sessions that should still be valid (created < 30 minutes ago) are being rejected
+- The problem occurs maybe 2-3% of requests but is hard to reproduce consistently
+- Server logs show session validation failing but no clear pattern
+
+ENVIRONMENT:
+- Python 3.13 API service
+- Running in production with multiple concurrent users
+- Redis not used for session storage (in-memory only)
+- Load balancer distributes requests across multiple instances
+
+RECENT CHANGES:
+- Increased session timeout from 15 to 30 minutes last week
+- Added cleanup routine to remove expired sessions
+- No major code changes to session management
+
+USER IMPACT:
+- Users have to re-authenticate randomly
+- Affects user experience and causes complaints
+- Seems to happen more on busy days
+
+The code looks correct to me, but something is causing valid sessions to be treated as expired or invalid. I'm not sure what's causing this intermittent behavior."""
+
+            error_file = self.create_additional_test_file("error_description.txt", error_description)
+            self.logger.info(f"  ✅ Created error description file: {error_file}")
+
+            # Call debug tool with flash model and realistic problem description
+            self.logger.info("  🔍 Calling debug tool to investigate session validation issues...")
+            response, continuation_id = self.call_mcp_tool(
+                "debug",
+                {
+                    "prompt": "Investigate why our API is experiencing intermittent session validation failures in production",
+                    "files": [test_file, error_file],
+                    "findings": "Users getting 'Invalid or expired session' errors randomly, occurs more during high traffic, sessions should still be valid",
+                    "error_context": "Sessions created < 30 minutes ago being rejected, happens ~2-3% of requests, load balanced environment",
+                    "systematic_investigation": True,
+                    "model": "flash",
+                    "thinking_mode": "medium",
+                },
+            )
+
+            if not response:
+                self.logger.error("Failed to get debug response")
+                return False
+
+            self.logger.info("  ✅ Got debug response")
+
+            # Parse response to validate bug identification
+            try:
+                response_data = json.loads(response)
+                self.logger.debug(f"Response keys: {list(response_data.keys())}")
+
+                # Extract the actual content if it's wrapped
+                if "content" in response_data:
+                    content = response_data["content"]
+                    # Handle markdown JSON blocks
+                    if content.startswith("```json"):
+                        content = content[7:]
+                    if content.endswith("```"):
+                        content = content[:-3]
+                    content = content.strip()
+
+                    # Parse the inner JSON
+                    inner_data = json.loads(content)
+                    self.logger.debug(f"Inner data keys: {list(inner_data.keys())}")
+                else:
+                    inner_data = response_data
+
+                # Check for structured debug analysis (should have analysis_complete status)
+                if inner_data.get("status") == "analysis_complete":
+                    self.logger.info("  ✅ Got structured debug analysis")
+
+                    # Validate hypothesis generation
+                    hypotheses = inner_data.get("hypotheses", [])
+                    if not hypotheses:
+                        self.logger.error("No hypotheses found in debug analysis")
+                        return False
+
+                    self.logger.info(f"  🧠 Found {len(hypotheses)} hypotheses")
+
+                    # Check if the model identified the real bug: dictionary modification during iteration
+                    analysis_text = json.dumps(inner_data).lower()
+
+                    # Look for the actual bug - modifying dictionary while iterating
+                    bug_indicators = [
+                        "dictionary",
+                        "iteration",
+                        "modify",
+                        "concurrent",
+                        "runtime error",
+                        "dictionary changed size during iteration",
+                        "cleanup_expired_sessions",
+                        "active_sessions",
+                        "del",
+                        "removing while iterating",
+                    ]
+
+                    found_indicators = [indicator for indicator in bug_indicators if indicator in analysis_text]
+
+                    # Check for specific mentions of the problematic pattern
+                    dictionary_bug_patterns = [
+                        "modifying dictionary while iterating",
+                        "dictionary changed size",
+                        "concurrent modification",
+                        "iterating over dictionary",
+                        "del.*active_sessions",
+                        "cleanup.*iteration",
+                    ]
+
+                    import re
+
+                    pattern_matches = []
+                    for pattern in dictionary_bug_patterns:
+                        if re.search(pattern, analysis_text):
+                            pattern_matches.append(pattern)
+
+                    if len(found_indicators) >= 3 or len(pattern_matches) >= 1:
+                        self.logger.info("  ✅ Flash identified the dictionary iteration bug")
+                        self.logger.info(f"     Found indicators: {found_indicators[:3]}")
+                        if pattern_matches:
+                            self.logger.info(f"     Pattern matches: {pattern_matches}")
+                    else:
+                        self.logger.error("  ❌ Flash missed the dictionary iteration bug")
+                        self.logger.error(f"     Found only: {found_indicators}")
+                        return False
+
+                    # Validate hypothesis quality (should have confidence levels and reasoning)
+                    valid_hypotheses = 0
+                    for i, hypothesis in enumerate(hypotheses[:3]):  # Check top 3
+                        confidence = hypothesis.get("confidence", "").lower()
+                        reasoning = hypothesis.get("reasoning", "")
+
+                        if confidence in ["high", "medium", "low"] and len(reasoning) > 20:
+                            valid_hypotheses += 1
+                            self.logger.debug(f"  Hypothesis {i+1}: {confidence} confidence, good reasoning")
+                        else:
+                            self.logger.debug(f"  Hypothesis {i+1}: weak ({confidence}, {len(reasoning)} chars)")
+
+                    if valid_hypotheses >= 2:
+                        self.logger.info(f"  ✅ Found {valid_hypotheses} well-structured hypotheses")
+                    else:
+                        self.logger.error(f"  ❌ Only {valid_hypotheses} well-structured hypotheses")
+                        return False
+
+                    # Check for line-specific references
+                    if "line" in analysis_text or "lines" in analysis_text:
+                        self.logger.info("  📍 Analysis includes line-specific references")
+                    else:
+                        self.logger.warning("  ⚠️ No line-specific references found")
+
+                else:
+                    # Non-structured response - check for dictionary iteration bug identification
+                    self.logger.info("  📝 Got general debug response")
+
+                    response_text = response.lower()
+
+                    # Check for the specific bug in general response
+                    bug_indicators = [
+                        "dictionary",
+                        "iteration",
+                        "modify",
+                        "concurrent",
+                        "active_sessions",
+                        "cleanup",
+                        "del ",
+                        "removing",
+                        "changed size",
+                    ]
+
+                    found_indicators = [indicator for indicator in bug_indicators if indicator in response_text]
+
+                    if len(found_indicators) >= 3:
+                        self.logger.info(f"  ✅ Found {len(found_indicators)} relevant indicators in response")
+                        self.logger.info(f"     Found: {found_indicators}")
+                    else:
+                        self.logger.error(f"  ❌ Only found {len(found_indicators)} relevant indicators")
+                        self.logger.error(f"     Found: {found_indicators}")
+                        return False
+
+            except json.JSONDecodeError as e:
+                self.logger.error(f"Failed to parse debug response as JSON: {e}")
+                # For non-JSON responses, check for dictionary iteration bug
+                response_text = response.lower()
+
+                bug_indicators = [
+                    "dictionary",
+                    "iteration",
+                    "modify",
+                    "concurrent",
+                    "active_sessions",
+                    "cleanup",
+                    "del ",
+                    "removing",
+                ]
+
+                found_indicators = [indicator for indicator in bug_indicators if indicator in response_text]
+
+                if len(found_indicators) >= 3:
+                    self.logger.info(f"  ✅ Text response found {len(found_indicators)} relevant indicators")
+                else:
+                    self.logger.error(f"  ❌ Text response only found {len(found_indicators)} relevant indicators")
+                    return False
+
+            # Validate logs
+            self.logger.info("  📋 Validating execution logs...")
+
+            # Get server logs from the actual log file inside the container
+            result = self.run_command(
+                ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True
+            )
+
+            if result.returncode == 0:
+                logs = result.stdout.decode() + result.stderr.decode()
+
+                # Look for debug tool execution patterns
+                debug_patterns = [
+                    "debug tool",
+                    "[DEBUG]",
+                    "systematic investigation",
+                    "Token budget",
+                    "Essential files for debugging",
+                ]
+
+                patterns_found = 0
+                for pattern in debug_patterns:
+                    if pattern in logs:
+                        patterns_found += 1
+                        self.logger.debug(f"  ✅ Found log pattern: {pattern}")
+
+                if patterns_found >= 3:
+                    self.logger.info(f"  ✅ Log validation passed ({patterns_found}/{len(debug_patterns)} patterns)")
+                else:
+                    self.logger.warning(f"  ⚠️ Only found {patterns_found}/{len(debug_patterns)} log patterns")
+            else:
+                self.logger.warning("  ⚠️ Could not retrieve Docker logs")
+
+            # Test continuation if available
+            if continuation_id:
+                self.logger.info("  🔄 Testing debug continuation...")
+
+                follow_up_response, _ = self.call_mcp_tool(
+                    "debug",
+                    {
+                        "prompt": "Based on your analysis, which bug should we fix first and how?",
+                        "continuation_id": continuation_id,
+                        "model": "flash",
+                    },
+                )
+
+                if follow_up_response:
+                    self.logger.info("  ✅ Debug continuation worked")
+                else:
+                    self.logger.warning("  ⚠️ Debug continuation failed")
+
+            self.logger.info("  ✅ Debug tool validation completed successfully")
+            return True
+
+        except Exception as e:
+            self.logger.error(f"Debug validation test failed: {e}")
+            return False
+        finally:
+            self.cleanup_test_files()
--- a/systemprompts/debug_prompt.py
+++ b/systemprompts/debug_prompt.py
@@ -4,8 +4,32 @@ Debug tool system prompt

 DEBUG_ISSUE_PROMPT = """
 ROLE
-You are an expert debugger and problem-solver. Analyze errors, trace root causes, and propose the minimal fix required.
-Bugs can ONLY be found and fixed from given code. These cannot be made up or imagined.
+You are an expert debugging assistant receiving systematic investigation findings from Claude.
+Claude has performed methodical investigation work following systematic debugging methodology.
+Your role is to provide expert analysis based on Claude's comprehensive investigation.
+
+SYSTEMATIC INVESTIGATION CONTEXT
+Claude has followed a systematic investigation approach:
+1. Methodical examination of error reports and symptoms
+2. Step-by-step code analysis and evidence collection
+3. Use of tracer tool for complex method interactions when needed
+4. Hypothesis formation and testing against actual code
+5. Documentation of findings and investigation evolution
+
+You are receiving:
+1. Issue description and original symptoms
+2. Claude's systematic investigation findings (comprehensive analysis)
+3. Essential files identified as critical for understanding the issue
+4. Error context, logs, and diagnostic information
+5. Tracer tool analysis results (if complex flow analysis was needed)
+
+TRACER TOOL INTEGRATION AWARENESS
+If Claude used the tracer tool during investigation, the findings will include:
+- Method call flow analysis
+- Class dependency mapping
+- Side effect identification
+- Execution path tracing
+This provides deep understanding of how code interactions contribute to the issue.

 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -14,33 +38,80 @@ exact positions if needed to point to exact locations. Include a very short code
 Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code
 snippets.

-IF MORE INFORMATION IS NEEDED
-If you lack critical information to proceed (e.g., missing files, ambiguous error details,
-insufficient context), OR if the provided diagnostics (log files, crash reports, stack traces) appear irrelevant,
-incomplete, or insufficient for proper analysis, you MUST respond ONLY with this JSON format (and nothing else).
-Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete:
-{"status": "clarification_required", "question": "<your brief question>",
- "files_needed": ["[file name here]", "[or some folder/]"]}
+WORKFLOW CONTEXT
+Your task is to analyze Claude's systematic investigation and provide expert debugging analysis back to Claude, who will
+then present the findings to the user in a consolidated format.

-CRITICAL: Your primary objective is to identify the root cause of the specific issue at hand and suggest the
-minimal fix required to resolve it. Stay focused on the main problem - avoid suggesting extensive refactoring,
-architectural changes, or unrelated improvements.
+STRUCTURED JSON OUTPUT FORMAT
+You MUST respond with a properly formatted JSON object following this exact schema.
+Do NOT include any text before or after the JSON. The response must be valid JSON only.

-SCOPE DISCIPLINE: Address ONLY the reported issue. Do not propose additional optimizations, code cleanup,
-or improvements beyond what's needed to fix the specific problem. You are a debug assistant, trying to help identify
-the root cause and minimal fix for an issue. Resist the urge to suggest broader changes
-even if you notice other potential issues.
+IF MORE INFORMATION IS NEEDED:
+If you lack critical information to proceed, respond with:
+{
+  "status": "clarification_required",
+  "question": "<your brief question>",
+  "files_needed": ["[file name here]", "[or some folder/]"]
+}

-DEBUGGING STRATEGY:
-1. Read and analyze ALL provided files, error messages, logs, and diagnostic information thoroughly
-2. Understand any requirements, constraints, or context given in the problem description
-3. If any information is incomplete or not enough, you must respond with the JSON format above and nothing else.
-4. Correlate diagnostics and any given logs or error statements with code to identify the precise failure point
-5. Work backwards from symptoms to find the underlying root cause
-6. Focus exclusively on resolving the reported issue with the simplest effective solution
+FOR COMPLETE ANALYSIS:
+{
+  "status": "analysis_complete",
+  "summary": "<brief description of the problem and its impact>",
+  "investigation_steps": [
+    "<step 1: what you analyzed first>",
+    "<step 2: what you discovered next>",
+    "<step 3: how findings evolved>",
+    "..."
+  ],
+  "hypotheses": [
+    {
+      "name": "<HYPOTHESIS NAME>",
+      "confidence": "High|Medium|Low",
+      "root_cause": "<technical explanation>",
+      "evidence": "<logs or code clues supporting this hypothesis>",
+      "correlation": "<how symptoms map to the cause>",
+      "validation": "<quick test to confirm>",
+      "minimal_fix": "<smallest change to resolve the issue>",
+      "regression_check": "<why this fix is safe>",
+      "file_references": ["<file:line format for exact locations>"],
+      "function_name": "<optional: specific function/method name if identified>",
+      "start_line": "<optional: starting line number if specific location identified>",
+      "end_line": "<optional: ending line number if specific location identified>",
+      "context_start_text": "<optional: exact text from start line for verification>",
+      "context_end_text": "<optional: exact text from end line for verification>"
+    }
+  ],
+  "key_findings": [
+    "<finding 1: important discoveries made during analysis>",
+    "<finding 2: code patterns or issues identified>",
+    "<finding 3: invalidated assumptions or refined understanding>"
+  ],
+  "immediate_actions": [
+    "<action 1: steps to take regardless of which hypothesis is correct>",
+    "<action 2: additional logging or monitoring needed>"
+  ],
+  "recommended_tools": [
+    "<tool recommendation if additional analysis needed, e.g., 'tracer tool for call flow analysis'>"
+  ],
+  "prevention_strategy": "<optional: targeted measures to prevent this exact issue from recurring>",
+  "investigation_summary": "<comprehensive summary of the complete investigation process and final conclusions>"
+}

-Your debugging approach should generate focused hypotheses ranked by likelihood, with emphasis on identifying
-the exact root cause and implementing minimal, targeted fixes.
+CRITICAL DEBUGGING PRINCIPLES:
+1. Bugs can ONLY be found and fixed from given code - these cannot be made up or imagined
+2. Focus ONLY on the reported issue - avoid suggesting extensive refactoring or unrelated improvements
+3. Propose minimal fixes that address the specific problem without introducing regressions
+4. Document your investigation process systematically for future reference
+5. Rank hypotheses by likelihood based on evidence from the actual code and logs provided
+6. Always include specific file:line references for exact locations of issues
+
+PRECISE LOCATION REFERENCES:
+When you identify specific code locations for hypotheses, include optional precision fields:
+- function_name: The exact function/method name where the issue occurs
+- start_line/end_line: Line numbers from the LINE│ markers (for reference ONLY - never include LINE│ in generated code)
+- context_start_text/context_end_text: Exact text from those lines for verification
+- These fields help Claude locate exact positions for implementing fixes

 REGRESSION PREVENTION: Before suggesting any fix, thoroughly analyze the proposed change to ensure it does not
 introduce new issues or break existing functionality. Consider:
@@ -48,30 +119,14 @@ introduce new issues or break existing functionality. Consider:
 - Whether the fix could impact related features or workflows
 - If the solution maintains backward compatibility
 - What potential side effects or unintended consequences might occur
-Review your suggested changes carefully and validate they solve ONLY the specific issue without causing regressions.

-OUTPUT FORMAT
+Your debugging approach should generate focused hypotheses ranked by likelihood, with emphasis on identifying
+the exact root cause and implementing minimal, targeted fixes while maintaining comprehensive documentation
+of the investigation process.

-## Summary
-Brief description of the problem and its impact.
-
-## Hypotheses (Ranked by Likelihood)
-
-### 1. [HYPOTHESIS NAME] (Confidence: High/Medium/Low)
-**Root Cause:** Technical explanation.
-**Evidence:** Logs or code clues supporting this hypothesis.
-**Correlation:** How symptoms map to the cause.
-**Validation:** Quick test to confirm.
-**Minimal Fix:** Smallest change to resolve the issue.
-**Regression Check:** Why this fix is safe.
-
-### 2. [HYPOTHESIS NAME] (Confidence: …)
-[Repeat format as above]
-
-## Immediate Actions
-Steps to take regardless of which hypothesis is correct (e.g., extra logging).
-
-## Prevention Strategy
-*Provide only if explicitly requested.*
-Targeted measures to prevent this exact issue from recurring.
+Your analysis should build upon Claude's systematic investigation to provide:
+- Expert validation of hypotheses
+- Additional insights based on systematic findings
+- Specific implementation guidance for fixes
+- Regression prevention analysis
 """
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -14,17 +14,22 @@ from systemprompts import ANALYZE_PROMPT

 from .base import BaseTool, ToolRequest

+# Field descriptions to avoid duplication between Pydantic and JSON schema
+ANALYZE_FIELD_DESCRIPTIONS = {
+    "files": "Files or directories to analyze (must be absolute paths)",
+    "prompt": "What to analyze or look for",
+    "analysis_type": "Type of analysis to perform",
+    "output_format": "How to format the output",
+}
+

 class AnalyzeRequest(ToolRequest):
    """Request model for analyze tool"""

-    files: list[str] = Field(..., description="Files or directories to analyze (must be absolute paths)")
-    prompt: str = Field(..., description="What to analyze or look for")
-    analysis_type: Optional[str] = Field(
-        None,
-        description="Type of analysis: architecture|performance|security|quality|general",
-    )
-    output_format: Optional[str] = Field("detailed", description="Output format: summary|detailed|actionable")
+    files: list[str] = Field(..., description=ANALYZE_FIELD_DESCRIPTIONS["files"])
+    prompt: str = Field(..., description=ANALYZE_FIELD_DESCRIPTIONS["prompt"])
+    analysis_type: Optional[str] = Field(None, description=ANALYZE_FIELD_DESCRIPTIONS["analysis_type"])
+    output_format: Optional[str] = Field("detailed", description=ANALYZE_FIELD_DESCRIPTIONS["output_format"])


 class AnalyzeTool(BaseTool):
@@ -50,12 +55,12 @@ class AnalyzeTool(BaseTool):
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Files or directories to analyze (must be absolute paths)",
+                    "description": ANALYZE_FIELD_DESCRIPTIONS["files"],
                },
                "model": self.get_model_field_schema(),
                "prompt": {
                    "type": "string",
-                    "description": "What to analyze or look for",
+                    "description": ANALYZE_FIELD_DESCRIPTIONS["prompt"],
                },
                "analysis_type": {
                    "type": "string",
@@ -66,13 +71,13 @@ class AnalyzeTool(BaseTool):
                        "quality",
                        "general",
                    ],
-                    "description": "Type of analysis to perform",
+                    "description": ANALYZE_FIELD_DESCRIPTIONS["analysis_type"],
                },
                "output_format": {
                    "type": "string",
                    "enum": ["summary", "detailed", "actionable"],
                    "default": "detailed",
-                    "description": "How to format the output",
+                    "description": ANALYZE_FIELD_DESCRIPTIONS["output_format"],
                },
                "temperature": {
                    "type": "number",
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -14,31 +14,29 @@ from systemprompts import CHAT_PROMPT

 from .base import BaseTool, ToolRequest

-
-class ChatRequest(ToolRequest):
-    """Request model for chat tool"""
-
-    prompt: str = Field(
-        ...,
-        description=(
+# Field descriptions to avoid duplication between Pydantic and JSON schema
+CHAT_FIELD_DESCRIPTIONS = {
+    "prompt": (
        "Your thorough, expressive question with as much context as possible. Remember: you're talking to "
        "another Claude assistant who has deep expertise and can provide nuanced insights. Include your "
        "current thinking, specific challenges, background context, what you've already tried, and what "
        "kind of response would be most helpful. The more context and detail you provide, the more "
        "valuable and targeted the response will be."
    ),
-    )
-    files: Optional[list[str]] = Field(
-        default_factory=list,
-        description="Optional files for context (must be absolute paths)",
-    )
-    images: Optional[list[str]] = Field(
-        default_factory=list,
-        description=(
+    "files": "Optional files for context (must be absolute paths)",
+    "images": (
        "Optional images for visual context. Useful for UI discussions, diagrams, visual problems, "
        "error screens, or architectural mockups."
    ),
-    )
+}
+
+
+class ChatRequest(ToolRequest):
+    """Request model for chat tool"""
+
+    prompt: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["prompt"])
+    files: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["files"])
+    images: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["images"])


 class ChatTool(BaseTool):
@@ -65,26 +63,17 @@ class ChatTool(BaseTool):
            "properties": {
                "prompt": {
                    "type": "string",
-                    "description": (
-                        "Your thorough, expressive question with as much context as possible. Remember: you're "
-                        "talking to another Claude assistant who has deep expertise and can provide nuanced "
-                        "insights. Include your current thinking, specific challenges, background context, what "
-                        "you've already tried, and what kind of response would be most helpful. The more context "
-                        "and detail you provide, the more valuable and targeted the response will be."
-                    ),
+                    "description": CHAT_FIELD_DESCRIPTIONS["prompt"],
                },
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Optional files for context (must be absolute paths)",
+                    "description": CHAT_FIELD_DESCRIPTIONS["files"],
                },
                "images": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": (
-                        "Optional images for visual context. Useful for UI discussions, diagrams, visual "
-                        "problems, error screens, or architectural mockups."
-                    ),
+                    "description": CHAT_FIELD_DESCRIPTIONS["images"],
                },
                "model": self.get_model_field_schema(),
                "temperature": {
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -23,6 +23,28 @@ from systemprompts import CODEREVIEW_PROMPT

 from .base import BaseTool, ToolRequest

+# Field descriptions to avoid duplication between Pydantic and JSON schema
+CODEREVIEW_FIELD_DESCRIPTIONS = {
+    "files": "Code files or directories to review (must be absolute paths)",
+    "prompt": (
+        "User's summary of what the code does, expected behavior, constraints, and review objectives. "
+        "IMPORTANT: Before using this tool, Claude should first perform its own preliminary review - "
+        "examining the code structure, identifying potential issues, understanding the business logic, "
+        "and noting areas of concern. Include Claude's initial observations about code quality, potential "
+        "bugs, architectural patterns, and specific areas that need deeper scrutiny. This dual-perspective "
+        "approach (Claude's analysis + external model's review) provides more comprehensive feedback and "
+        "catches issues that either reviewer might miss alone."
+    ),
+    "images": (
+        "Optional images of architecture diagrams, UI mockups, design documents, or visual references "
+        "for code review context"
+    ),
+    "review_type": "Type of review to perform",
+    "focus_on": "Specific aspects to focus on, or additional context that would help understand areas of concern",
+    "standards": "Coding standards to enforce",
+    "severity_filter": "Minimum severity level to report",
+}
+

 class CodeReviewRequest(ToolRequest):
    """
@@ -33,39 +55,13 @@ class CodeReviewRequest(ToolRequest):
    review focus and standards.
    """

-    files: list[str] = Field(
-        ...,
-        description="Code files or directories to review (must be absolute paths)",
-    )
-    prompt: str = Field(
-        ...,
-        description=(
-            "User's summary of what the code does, expected behavior, constraints, and review objectives. "
-            "IMPORTANT: Before using this tool, Claude should first perform its own preliminary review - "
-            "examining the code structure, identifying potential issues, understanding the business logic, "
-            "and noting areas of concern. Include Claude's initial observations about code quality, potential "
-            "bugs, architectural patterns, and specific areas that need deeper scrutiny. This dual-perspective "
-            "approach (Claude's analysis + external model's review) provides more comprehensive feedback and "
-            "catches issues that either reviewer might miss alone."
-        ),
-    )
-    images: Optional[list[str]] = Field(
-        None,
-        description=(
-            "Optional images of architecture diagrams, UI mockups, design documents, or visual references "
-            "for code review context"
-        ),
-    )
-    review_type: str = Field("full", description="Type of review: full|security|performance|quick")
-    focus_on: Optional[str] = Field(
-        None,
-        description=("Specific aspects to focus on, or additional context that would help understand areas of concern"),
-    )
-    standards: Optional[str] = Field(None, description="Coding standards or guidelines to enforce")
-    severity_filter: str = Field(
-        "all",
-        description="Minimum severity to report: critical|high|medium|low|all",
-    )
+    files: list[str] = Field(..., description=CODEREVIEW_FIELD_DESCRIPTIONS["files"])
+    prompt: str = Field(..., description=CODEREVIEW_FIELD_DESCRIPTIONS["prompt"])
+    images: Optional[list[str]] = Field(None, description=CODEREVIEW_FIELD_DESCRIPTIONS["images"])
+    review_type: str = Field("full", description=CODEREVIEW_FIELD_DESCRIPTIONS["review_type"])
+    focus_on: Optional[str] = Field(None, description=CODEREVIEW_FIELD_DESCRIPTIONS["focus_on"])
+    standards: Optional[str] = Field(None, description=CODEREVIEW_FIELD_DESCRIPTIONS["standards"])
+    severity_filter: str = Field("all", description=CODEREVIEW_FIELD_DESCRIPTIONS["severity_filter"])


 class CodeReviewTool(BaseTool):
@@ -103,52 +99,37 @@ class CodeReviewTool(BaseTool):
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Code files or directories to review (must be absolute paths)",
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["files"],
                },
                "model": self.get_model_field_schema(),
                "prompt": {
                    "type": "string",
-                    "description": (
-                        "User's summary of what the code does, expected behavior, constraints, and review "
-                        "objectives. IMPORTANT: Before using this tool, Claude should first perform its own "
-                        "preliminary review - examining the code structure, identifying potential issues, "
-                        "understanding the business logic, and noting areas of concern. Include Claude's initial "
-                        "observations about code quality, potential bugs, architectural patterns, and specific "
-                        "areas that need deeper scrutiny. This dual-perspective approach (Claude's analysis + "
-                        "external model's review) provides more comprehensive feedback and catches issues that "
-                        "either reviewer might miss alone."
-                    ),
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["prompt"],
                },
                "images": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": (
-                        "Optional images of architecture diagrams, UI mockups, design documents, or visual "
-                        "references for code review context"
-                    ),
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["images"],
                },
                "review_type": {
                    "type": "string",
                    "enum": ["full", "security", "performance", "quick"],
                    "default": "full",
-                    "description": "Type of review to perform",
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["review_type"],
                },
                "focus_on": {
                    "type": "string",
-                    "description": (
-                        "Specific aspects to focus on, or additional context that would help understand "
-                        "areas of concern"
-                    ),
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["focus_on"],
                },
                "standards": {
                    "type": "string",
-                    "description": "Coding standards to enforce",
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["standards"],
                },
                "severity_filter": {
                    "type": "string",
                    "enum": ["critical", "high", "medium", "low", "all"],
                    "default": "all",
-                    "description": "Minimum severity level to report",
+                    "description": CODEREVIEW_FIELD_DESCRIPTIONS["severity_filter"],
                },
                "temperature": {
                    "type": "number",
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -14,22 +14,49 @@ from systemprompts import DEBUG_ISSUE_PROMPT

 from .base import BaseTool, ToolRequest

+# Field descriptions to avoid duplication between Pydantic and JSON schema
+DEBUG_FIELD_DESCRIPTIONS = {
+    "prompt": (
+        "Issue description. Include what you can provide: "
+        "error messages, symptoms, when it occurs, steps to reproduce, environment details, "
+        "recent changes, and any other relevant information. Mention any previous attempts at fixing this issue, "
+        "including any past fix that was in place but has now regressed. "
+        "The more context available, the better the analysis. "
+        "SYSTEMATIC INVESTIGATION: Claude MUST begin by thinking hard and performing a thorough investigation using a systematic approach. "
+        "First understand the issue, find the code that may be causing it or code that is breaking, as well as any related code that could have caused this as a side effect. "
+        "Claude MUST maintain detailed investigation notes in a DEBUGGING_{issue_description}.md file within the project folder, "
+        "updating it as it performs step-by-step analysis of the code, trying to determine the actual root cause and understanding how a minimal, appropriate fix can be found. "
+        "This file MUST contain functions, methods, files visited OR determined to be part of the problem. Claude MUST update this and remove any references that it finds to be irrelevant during its investigation. "
+        "Once complete, Claude MUST provide Zen's debug tool with this file passed into the files parameter. "
+        "It is ESSENTIAL that this detailed work is performed by Claude before sharing all the relevant details with its development assistant. This will greatly help in zeroing in on the root cause."
+    ),
+    "findings": (
+        "Claude MUST first perform its own investigation, gather its findings and analysis. Include: steps taken to analyze the issue, "
+        "code patterns discovered, initial hypotheses formed, any relevant classes/functions/methods examined, "
+        "and any preliminary conclusions. This provides context for the assistant model's analysis."
+    ),
+    "files": (
+        "Essential files for debugging - ONLY include files that are directly related to the issue, "
+        "contain the problematic code, or are necessary for understanding the root cause. "
+        "This can include any relevant log files, error description documents, investigation documents, "
+        "claude's own findings as a document, related code that may help with analysis."
+        "DO NOT include every file scanned during investigation (must be absolute paths)."
+    ),
+    "error_context": "Stack trace, snippet from logs, or additional error context. For very large text you MUST instead"
+    "save the context as a temporary file within the project folder and share it as an absolute file path"
+    "reference to the files parameter.",
+    "images": "Optional images showing error screens, UI issues, logs displays, or visual debugging information",
+}
+

 class DebugIssueRequest(ToolRequest):
    """Request model for debug tool"""

-    prompt: str = Field(..., description="Error message, symptoms, or issue description")
-    error_context: Optional[str] = Field(None, description="Stack trace, logs, or additional error context")
-    files: Optional[list[str]] = Field(
-        None,
-        description="Files or directories that might be related to the issue (must be absolute paths)",
-    )
-    images: Optional[list[str]] = Field(
-        None,
-        description="Optional images showing error screens, UI issues, logs displays, or visual debugging information",
-    )
-    runtime_info: Optional[str] = Field(None, description="Environment, versions, or runtime information")
-    previous_attempts: Optional[str] = Field(None, description="What has been tried already")
+    prompt: str = Field(..., description=DEBUG_FIELD_DESCRIPTIONS["prompt"])
+    findings: Optional[str] = Field(None, description=DEBUG_FIELD_DESCRIPTIONS["findings"])
+    files: Optional[list[str]] = Field(None, description=DEBUG_FIELD_DESCRIPTIONS["files"])
+    error_context: Optional[str] = Field(None, description=DEBUG_FIELD_DESCRIPTIONS["error_context"])
+    images: Optional[list[str]] = Field(None, description=DEBUG_FIELD_DESCRIPTIONS["images"])


 class DebugIssueTool(BaseTool):
@@ -40,15 +67,35 @@ class DebugIssueTool(BaseTool):

    def get_description(self) -> str:
        return (
-            "DEBUG & ROOT CAUSE ANALYSIS - Expert debugging for complex issues with 1M token capacity. "
+            "DEBUG & ROOT CAUSE ANALYSIS - Expert debugging for complex issues with systematic investigation support. "
            "Use this when you need to debug code, find out why something is failing, identify root causes, "
            "trace errors, or diagnose issues. "
-            "IMPORTANT: Share diagnostic files liberally! The model can handle up to 1M tokens, so include: "
-            "large log files, full stack traces, memory dumps, diagnostic outputs, multiple related files, "
-            "entire modules, test results, configuration files - anything that might help debug the issue. "
-            "Claude should proactively use this tool whenever debugging is needed and share comprehensive "
-            "file paths rather than snippets. Include error messages, stack traces, logs, and ALL relevant "
-            "code files as absolute paths. The more context, the better the debugging analysis. "
+            "SYSTEMATIC INVESTIGATION WORKFLOW: "
+            "Claude MUST begin by thinking hard and performing a thorough investigation using a systematic approach. "
+            "First understand the issue, find the code that may be causing it or code that is breaking, as well as any related code that could have caused this as a side effect. "
+            "Claude MUST maintain detailed investigation notes while it performs its analysis, "
+            "updating it as it performs step-by-step analysis of the code, trying to determine the actual root cause and understanding how a minimal, appropriate fix can be found. "
+            "This file MUST contain functions, methods, files visited OR determined to be part of the problem. Claude MUST update this and remove any references that it finds to be irrelevant during its investigation. "
+            "Once complete, Claude MUST provide Zen's debug tool with this file passed into the files parameter. "
+            "1. INVESTIGATE SYSTEMATICALLY: Claude MUST think and use a methodical approach to trace through error reports, "
+            "examine code, and gather evidence step by step "
+            "2. DOCUMENT FINDINGS: Maintain detailed investigation notes to "
+            "keep the user informed during its initial investigation. This investigation MUST be shared with this tool for the assistant "
+            "to be able to help more effectively. "
+            "3. USE TRACER TOOL: For complex method calls, class references, or side effects use Zen's tracer tool and include its output as part of the "
+            "prompt or additional context "
+            "4. COLLECT EVIDENCE: Document important discoveries and validation attempts "
+            "5. PROVIDE COMPREHENSIVE FINDINGS: Pass complete findings to this tool for expert analysis "
+            "INVESTIGATION METHODOLOGY: "
+            "- Start with error messages/symptoms and work backwards to root cause "
+            "- Examine code flow and identify potential failure points "
+            "- Use tracer tool for complex method interactions and dependencies if and as needed but continue with the investigation after using it "
+            "- Test hypotheses against actual code and logs and confirm the idea holds "
+            "- Document everything systematically "
+            "ESSENTIAL FILES ONLY: Include only files (documents, code etc) directly related to the issue. "
+            "Focus on quality over quantity for assistant model analysis. "
+            "STRUCTURED OUTPUT: Assistant models return JSON responses with hypothesis "
+            "ranking, evidence correlation, and actionable fixes. "
            "Choose thinking_mode based on issue complexity: 'low' for simple errors, "
            "'medium' for standard debugging (default), 'high' for complex system issues, "
            "'max' for extremely challenging bugs requiring deepest analysis. "
@@ -61,30 +108,26 @@ class DebugIssueTool(BaseTool):
            "properties": {
                "prompt": {
                    "type": "string",
-                    "description": "Error message, symptoms, or issue description",
+                    "description": DEBUG_FIELD_DESCRIPTIONS["prompt"],
                },
                "model": self.get_model_field_schema(),
-                "error_context": {
+                "findings": {
                    "type": "string",
-                    "description": "Stack trace, logs, or additional error context",
+                    "description": DEBUG_FIELD_DESCRIPTIONS["findings"],
                },
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Files or directories that might be related to the issue (must be absolute paths)",
+                    "description": DEBUG_FIELD_DESCRIPTIONS["files"],
+                },
+                "error_context": {
+                    "type": "string",
+                    "description": DEBUG_FIELD_DESCRIPTIONS["error_context"],
                },
                "images": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Optional images showing error screens, UI issues, logs displays, or visual debugging information",
-                },
-                "runtime_info": {
-                    "type": "string",
-                    "description": "Environment, versions, or runtime information",
-                },
-                "previous_attempts": {
-                    "type": "string",
-                    "description": "What has been tried already",
+                    "description": DEBUG_FIELD_DESCRIPTIONS["images"],
                },
                "temperature": {
                    "type": "number",
@@ -164,15 +207,12 @@ class DebugIssueTool(BaseTool):
        # Build context sections
        context_parts = [f"=== ISSUE DESCRIPTION ===\n{request.prompt}\n=== END DESCRIPTION ==="]

+        if request.findings:
+            context_parts.append(f"\n=== CLAUDE'S INVESTIGATION FINDINGS ===\n{request.findings}\n=== END FINDINGS ===")
+
        if request.error_context:
            context_parts.append(f"\n=== ERROR CONTEXT/STACK TRACE ===\n{request.error_context}\n=== END CONTEXT ===")

-        if request.runtime_info:
-            context_parts.append(f"\n=== RUNTIME INFORMATION ===\n{request.runtime_info}\n=== END RUNTIME ===")
-
-        if request.previous_attempts:
-            context_parts.append(f"\n=== PREVIOUS ATTEMPTS ===\n{request.previous_attempts}\n=== END ATTEMPTS ===")
-
        # Add relevant files if provided
        if request.files:
            # Use centralized file processing logic
@@ -183,7 +223,9 @@ class DebugIssueTool(BaseTool):
            self._actually_processed_files = processed_files

            if file_content:
-                context_parts.append(f"\n=== RELEVANT CODE ===\n{file_content}\n=== END CODE ===")
+                context_parts.append(
+                    f"\n=== ESSENTIAL FILES FOR DEBUGGING ===\n{file_content}\n=== END ESSENTIAL FILES ==="
+                )

        full_context = "\n".join(context_parts)

@@ -211,15 +253,55 @@ Focus on finding the root cause and providing actionable solutions."""

        return full_prompt

-    def format_response(self, response: str, request: DebugIssueRequest, model_info: Optional[dict] = None) -> str:
-        """Format the debugging response"""
-        # Get the friendly model name
-        model_name = "the model"
+    def _get_model_name(self, model_info: Optional[dict]) -> str:
+        """Extract friendly model name from model info."""
        if model_info and model_info.get("model_response"):
-            model_name = model_info["model_response"].friendly_name or "the model"
+            return model_info["model_response"].friendly_name or "the model"
+        return "the model"
+
+    def _generate_systematic_next_steps(self, model_name: str) -> str:
+        """Generate next steps for systematic investigation completion."""
+        return f"""**Expert Analysis Complete**
+
+{model_name} has analyzed your systematic investigation findings.
+
+**Next Steps:**
+1. **UPDATE INVESTIGATION DOCUMENT**: Add the expert analysis to your DEBUGGING_*.md file
+2. **REVIEW HYPOTHESES**: Examine the ranked hypotheses and evidence validation
+3. **IMPLEMENT FIXES**: Apply recommended minimal fixes in order of likelihood
+4. **VALIDATE CHANGES**: Test each fix thoroughly to ensure no regressions
+5. **DOCUMENT RESOLUTION**: Update investigation document with final resolution"""
+
+    def _generate_standard_analysis_steps(self, model_name: str) -> str:
+        """Generate next steps for standard analysis completion."""
+        return f"""**Expert Analysis Complete**
+
+{model_name} has analyzed your investigation findings.
+
+**Next Steps:**
+1. **REVIEW HYPOTHESES**: Examine the ranked hypotheses and evidence
+2. **IMPLEMENT FIXES**: Apply recommended minimal fixes in order of likelihood
+3. **VALIDATE CHANGES**: Test each fix thoroughly to ensure no regressions"""
+
+    def _generate_general_analysis_steps(self, model_name: str) -> str:
+        """Generate next steps for general analysis responses."""
+        return f"""**Analysis from {model_name}**
+
+**Next Steps:** Continue your systematic investigation based on the guidance provided, then return
+with comprehensive findings for expert analysis."""
+
+    def format_response(self, response: str, request: DebugIssueRequest, model_info: Optional[dict] = None) -> str:
+        """Format the debugging response for Claude to present to user"""
+        # The base class automatically handles structured responses like 'clarification_required'
+        # and 'analysis_complete' via SPECIAL_STATUS_MODELS, so we only handle normal text responses here
+
+        model_name = self._get_model_name(model_info)
+
+        # For normal text responses, provide general guidance
+        next_steps = self._generate_general_analysis_steps(model_name)

        return f"""{response}

 ---

-**Next Steps:** Evaluate {model_name}'s recommendations, synthesize the best fix considering potential regressions, and if the root cause has been clearly identified, proceed with implementing the potential fixes."""
+{next_steps}"""
--- a/tools/models.py
+++ b/tools/models.py
@@ -284,20 +284,6 @@ class TraceComplete(BaseModel):
    state_access: Optional[list[StateAccess]] = Field(default_factory=list, description="State access information")


-# Registry mapping status strings to their corresponding Pydantic models
-SPECIAL_STATUS_MODELS = {
-    "clarification_required": ClarificationRequest,
-    "full_codereview_required": FullCodereviewRequired,
-    "focused_review_required": FocusedReviewRequired,
-    "test_sample_needed": TestSampleNeeded,
-    "more_tests_required": MoreTestsRequired,
-    "refactor_analysis_complete": RefactorAnalysisComplete,
-    "trace_complete": TraceComplete,
-    "resend_prompt": ResendPromptRequest,
-    "code_too_large": CodeTooLargeRequest,
-}
-
-
 class DiagnosticHypothesis(BaseModel):
    """A debugging hypothesis with context and next steps"""

@@ -321,3 +307,51 @@ class StructuredDebugResponse(BaseModel):
        default_factory=list,
        description="Additional files or information that would help with analysis",
    )
+
+
+class DebugHypothesis(BaseModel):
+    """A debugging hypothesis with detailed analysis"""
+
+    name: str = Field(..., description="Name/title of the hypothesis")
+    confidence: Literal["High", "Medium", "Low"] = Field(..., description="Confidence level")
+    root_cause: str = Field(..., description="Technical explanation of the root cause")
+    evidence: str = Field(..., description="Logs or code clues supporting this hypothesis")
+    correlation: str = Field(..., description="How symptoms map to the cause")
+    validation: str = Field(..., description="Quick test to confirm the hypothesis")
+    minimal_fix: str = Field(..., description="Smallest change to resolve the issue")
+    regression_check: str = Field(..., description="Why this fix is safe")
+    file_references: list[str] = Field(default_factory=list, description="File:line format for exact locations")
+
+
+class DebugAnalysisComplete(BaseModel):
+    """Complete debugging analysis with systematic investigation tracking"""
+
+    status: Literal["analysis_complete"] = "analysis_complete"
+    investigation_id: str = Field(..., description="Auto-generated unique ID for this investigation")
+    summary: str = Field(..., description="Brief description of the problem and its impact")
+    investigation_steps: list[str] = Field(..., description="Steps taken during the investigation")
+    hypotheses: list[DebugHypothesis] = Field(..., description="Ranked hypotheses with detailed analysis")
+    key_findings: list[str] = Field(..., description="Important discoveries made during analysis")
+    immediate_actions: list[str] = Field(..., description="Steps to take regardless of which hypothesis is correct")
+    recommended_tools: list[str] = Field(default_factory=list, description="Additional tools recommended for analysis")
+    prevention_strategy: Optional[str] = Field(
+        None, description="Targeted measures to prevent this exact issue from recurring"
+    )
+    investigation_summary: str = Field(
+        ..., description="Comprehensive summary of the complete investigation process and conclusions"
+    )
+
+
+# Registry mapping status strings to their corresponding Pydantic models
+SPECIAL_STATUS_MODELS = {
+    "clarification_required": ClarificationRequest,
+    "full_codereview_required": FullCodereviewRequired,
+    "focused_review_required": FocusedReviewRequired,
+    "test_sample_needed": TestSampleNeeded,
+    "more_tests_required": MoreTestsRequired,
+    "refactor_analysis_complete": RefactorAnalysisComplete,
+    "trace_complete": TraceComplete,
+    "resend_prompt": ResendPromptRequest,
+    "code_too_large": CodeTooLargeRequest,
+    "analysis_complete": DebugAnalysisComplete,
+}
--- a/tools/refactor.py
+++ b/tools/refactor.py
@@ -30,6 +30,19 @@ from .base import BaseTool, ToolRequest
 logger = logging.getLogger(__name__)


+# Field descriptions to avoid duplication between Pydantic and JSON schema
+REFACTOR_FIELD_DESCRIPTIONS = {
+    "files": "Code files or directories to analyze for refactoring opportunities (must be absolute paths)",
+    "prompt": "Description of refactoring goals, context, and specific areas of focus",
+    "refactor_type": "Type of refactoring analysis to perform",
+    "focus_areas": "Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')",
+    "style_guide_examples": (
+        "Optional existing code files to use as style/pattern reference (must be absolute paths). "
+        "These files represent the target coding style and patterns for the project."
+    ),
+}
+
+
 class RefactorRequest(ToolRequest):
    """
    Request model for the refactor tool.
@@ -38,28 +51,14 @@ class RefactorRequest(ToolRequest):
    the refactoring analysis process.
    """

-    files: list[str] = Field(
-        ...,
-        description="Code files or directories to analyze for refactoring opportunities (must be absolute paths)",
-    )
-    prompt: str = Field(
-        ...,
-        description="Description of refactoring goals, context, and specific areas of focus",
-    )
+    files: list[str] = Field(..., description=REFACTOR_FIELD_DESCRIPTIONS["files"])
+    prompt: str = Field(..., description=REFACTOR_FIELD_DESCRIPTIONS["prompt"])
    refactor_type: Literal["codesmells", "decompose", "modernize", "organization"] = Field(
-        ..., description="Type of refactoring analysis to perform"
-    )
-    focus_areas: Optional[list[str]] = Field(
-        None,
-        description="Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')",
+        ..., description=REFACTOR_FIELD_DESCRIPTIONS["refactor_type"]
    )
+    focus_areas: Optional[list[str]] = Field(None, description=REFACTOR_FIELD_DESCRIPTIONS["focus_areas"])
    style_guide_examples: Optional[list[str]] = Field(
-        None,
-        description=(
-            "Optional existing code files to use as style/pattern reference (must be absolute paths). "
-            "These files represent the target coding style and patterns for the project. "
-            "Particularly useful for 'modernize' and 'organization' refactor types."
-        ),
+        None, description=REFACTOR_FIELD_DESCRIPTIONS["style_guide_examples"]
    )


@@ -92,30 +91,27 @@ class RefactorTool(BaseTool):
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Code files or directories to analyze for refactoring opportunities (must be absolute paths)",
+                    "description": REFACTOR_FIELD_DESCRIPTIONS["files"],
                },
                "model": self.get_model_field_schema(),
                "prompt": {
                    "type": "string",
-                    "description": "Description of refactoring goals, context, and specific areas of focus",
+                    "description": REFACTOR_FIELD_DESCRIPTIONS["prompt"],
                },
                "refactor_type": {
                    "type": "string",
                    "enum": ["codesmells", "decompose", "modernize", "organization"],
-                    "description": "Type of refactoring analysis to perform",
+                    "description": REFACTOR_FIELD_DESCRIPTIONS["refactor_type"],
                },
                "focus_areas": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')",
+                    "description": REFACTOR_FIELD_DESCRIPTIONS["focus_areas"],
                },
                "style_guide_examples": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": (
-                        "Optional existing code files to use as style/pattern reference (must be absolute paths). "
-                        "These files represent the target coding style and patterns for the project."
-                    ),
+                    "description": REFACTOR_FIELD_DESCRIPTIONS["style_guide_examples"],
                },
                "thinking_mode": {
                    "type": "string",