From 4b202f5d1d24cea1394adab26a976188f847bd09 Mon Sep 17 00:00:00 2001 From: Fahad Date: Fri, 22 Aug 2025 09:23:59 +0400 Subject: [PATCH] feat: refactored and tweaked model descriptions / schema to use fewer tokens at launch (average reduction per field description: 60-80%) without sacrificing tool effectiveness Disabled secondary tools by default (for new installations), updated README.md with instructions on how to enable these in .env run-server.sh now displays disabled / enabled tools (when DISABLED_TOOLS is set) --- .env.example | 15 +++-- README.md | 80 +++++++++++++++++++++++++-- run-server.sh | 70 +++++++++++++++++++++++ tests/test_challenge.py | 4 +- tests/test_chat_simple.py | 2 +- tests/test_consensus.py | 2 +- tests/test_debug.py | 2 +- tests/test_listmodels.py | 2 +- tests/test_planner.py | 2 +- tests/test_precommit_workflow.py | 4 +- tests/test_refactor.py | 6 +- tests/test_secaudit.py | 8 +-- tests/test_tools.py | 6 +- tests/test_tracer.py | 4 +- tools/analyze.py | 54 ++++++------------ tools/challenge.py | 2 +- tools/chat.py | 40 ++++---------- tools/codereview.py | 92 ++++++++----------------------- tools/consensus.py | 17 ++---- tools/debug.py | 88 ++++++++--------------------- tools/docgen.py | 48 +++++----------- tools/listmodels.py | 4 +- tools/planner.py | 7 +-- tools/precommit.py | 95 ++++++++------------------------ tools/refactor.py | 65 +++++++--------------- tools/secaudit.py | 85 +++++++--------------------- tools/shared/base_models.py | 61 ++++++++++---------- tools/testgen.py | 35 ++++-------- tools/thinkdeep.py | 64 +++++++-------------- tools/tracer.py | 50 +++++------------ tools/version.py | 4 +- 31 files changed, 409 insertions(+), 609 deletions(-) diff --git a/.env.example b/.env.example index 6acf4cd..58d0037 100644 --- a/.env.example +++ b/.env.example @@ -149,11 +149,16 @@ LOG_LEVEL=DEBUG # Comma-separated list of tools to disable. If not set, all tools are enabled. # Essential tools (version, listmodels) cannot be disabled. # Available tools: chat, thinkdeep, planner, consensus, codereview, precommit, -# debug, docgen, analyze, refactor, tracer, testgen -# Examples: -# DISABLED_TOOLS= # All tools enabled (default) -# DISABLED_TOOLS=debug,tracer # Disable debug and tracer tools -# DISABLED_TOOLS=planner,consensus # Disable planning tools +# debug, docgen, analyze, refactor, tracer, testgen, challenge, secaudit +# +# DEFAULT CONFIGURATION: To optimize context window usage, non-essential tools +# are disabled by default. Only the essential tools remain enabled: +# - chat, thinkdeep, planner, consensus (collaboration tools) +# - codereview, precommit, debug (code quality tools) +# - challenge (critical thinking utility) +# +# To enable additional tools, remove them from the DISABLED_TOOLS list below. +DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer # Optional: Language/Locale for AI responses # When set, all AI tools will respond in the specified language diff --git a/README.md b/README.md index 9c622d4..699759b 100644 --- a/README.md +++ b/README.md @@ -125,19 +125,21 @@ cd zen-mcp-server ## Core Tools -**Collaboration & Planning** +> **Note:** Each tool comes with its own multi-step workflow, parameters, and descriptions that consume valuable context window space even when not in use. To optimize performance, some tools are disabled by default. See [Tool Configuration](#tool-configuration) below to enable them. + +**Collaboration & Planning** *(Enabled by default)* - **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches - **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives - **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans - **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering **Code Analysis & Quality** -- **[`analyze`](docs/tools/analyze.md)** - Understand architecture, patterns, dependencies across entire codebases -- **[`codereview`](docs/tools/codereview.md)** - Professional reviews with severity levels and actionable feedback - **[`debug`](docs/tools/debug.md)** - Systematic investigation and root cause analysis - **[`precommit`](docs/tools/precommit.md)** - Validate changes before committing, prevent regressions +- **[`codereview`](docs/tools/codereview.md)** - Professional reviews with severity levels and actionable feedback +- **[`analyze`](docs/tools/analyze.md)** *(disabled by default - [enable](#tool-configuration))* - Understand architecture, patterns, dependencies across entire codebases -**Development Tools** +**Development Tools** *(Disabled by default - [enable](#tool-configuration))* - **[`refactor`](docs/tools/refactor.md)** - Intelligent code refactoring with decomposition focus - **[`testgen`](docs/tools/testgen.md)** - Comprehensive test generation with edge cases - **[`secaudit`](docs/tools/secaudit.md)** - Security audits with OWASP Top 10 analysis @@ -145,9 +147,75 @@ cd zen-mcp-server **Utilities** - **[`challenge`](docs/tools/challenge.md)** - Prevent "You're absolutely right!" responses with critical analysis -- **[`tracer`](docs/tools/tracer.md)** - Static analysis prompts for call-flow mapping +- **[`tracer`](docs/tools/tracer.md)** *(disabled by default - [enable](#tool-configuration))* - Static analysis prompts for call-flow mapping -👉 **[Complete Tools Reference](docs/tools/)** with examples, parameters, and workflows +
+👉 Tool Configuration + +### Default Configuration + +To optimize context window usage, only essential tools are enabled by default: + +**Enabled by default:** +- `chat`, `thinkdeep`, `planner`, `consensus` - Core collaboration tools +- `codereview`, `precommit`, `debug` - Essential code quality tools +- `challenge` - Critical thinking utility + +**Disabled by default:** +- `analyze`, `refactor`, `testgen`, `secaudit`, `docgen`, `tracer` + +### Enabling Additional Tools + +To enable additional tools, remove them from the `DISABLED_TOOLS` list: + +**Option 1: Edit your .env file** +```bash +# Default configuration (from .env.example) +DISABLED_TOOLS=analyze,refactor,testgen,secaudit,docgen,tracer + +# To enable specific tools, remove them from the list +# Example: Enable analyze tool +DISABLED_TOOLS=refactor,testgen,secaudit,docgen,tracer + +# To enable ALL tools +DISABLED_TOOLS= +``` + +**Option 2: Configure in MCP settings** +```json +// In ~/.claude/settings.json or .mcp.json +{ + "mcpServers": { + "zen": { + "env": { + // Enable analyze by removing it from disabled list + "DISABLED_TOOLS": "refactor,testgen,secaudit,docgen,tracer" + } + } + } +} +``` + +**Option 3: Enable all tools** +```json +// Remove or empty the DISABLED_TOOLS to enable everything +{ + "mcpServers": { + "zen": { + "env": { + "DISABLED_TOOLS": "" + } + } + } +} +``` + +**Note:** +- Essential tools (`version`, `listmodels`) cannot be disabled +- After changing tool configuration, restart your Claude session for changes to take effect +- Each tool adds to context window usage, so only enable what you need + +
## Key Features diff --git a/run-server.sh b/run-server.sh index de66be8..9dc263b 100755 --- a/run-server.sh +++ b/run-server.sh @@ -1546,6 +1546,76 @@ display_setup_instructions() { printf '%*s\n' "$((${#setup_header} + 12))" | tr ' ' '=' echo "" print_success "Zen is ready to use!" + + # Display enabled/disabled tools if DISABLED_TOOLS is configured + if [[ -n "${DISABLED_TOOLS:-}" ]]; then + echo "" + print_info "Tool Configuration:" + + # Dynamically discover all available tools from the tools directory + # Excludes: __pycache__, shared modules, models.py, listmodels.py, version.py + local all_tools=() + for tool_file in tools/*.py; do + if [[ -f "$tool_file" ]]; then + local tool_name=$(basename "$tool_file" .py) + # Skip non-tool files + if [[ "$tool_name" != "models" && "$tool_name" != "listmodels" && "$tool_name" != "version" && "$tool_name" != "__init__" ]]; then + all_tools+=("$tool_name") + fi + fi + done + + # Convert DISABLED_TOOLS to array + IFS=',' read -ra disabled_array <<< "$DISABLED_TOOLS" + + # Trim whitespace from disabled tools + local disabled_tools=() + for tool in "${disabled_array[@]}"; do + disabled_tools+=("$(echo "$tool" | xargs)") + done + + # Determine enabled tools + local enabled_tools=() + for tool in "${all_tools[@]}"; do + local is_disabled=false + for disabled in "${disabled_tools[@]}"; do + if [[ "$tool" == "$disabled" ]]; then + is_disabled=true + break + fi + done + if [[ "$is_disabled" == false ]]; then + enabled_tools+=("$tool") + fi + done + + # Display enabled tools + echo "" + echo -e " ${GREEN}Enabled Tools (${#enabled_tools[@]}):${NC}" + local enabled_list="" + for tool in "${enabled_tools[@]}"; do + if [[ -n "$enabled_list" ]]; then + enabled_list+=", " + fi + enabled_list+="$tool" + done + echo " $enabled_list" + + # Display disabled tools + echo "" + echo -e " ${YELLOW}Disabled Tools (${#disabled_tools[@]}):${NC}" + local disabled_list="" + for tool in "${disabled_tools[@]}"; do + if [[ -n "$disabled_list" ]]; then + disabled_list+=", " + fi + disabled_list+="$tool" + done + echo " $disabled_list" + + echo "" + echo " To enable more tools, edit the DISABLED_TOOLS variable in .env" + fi } # ---------------------------------------------------------------------------- diff --git a/tests/test_challenge.py b/tests/test_challenge.py index 7bbe27e..b81f2e4 100644 --- a/tests/test_challenge.py +++ b/tests/test_challenge.py @@ -24,8 +24,8 @@ class TestChallengeTool: def test_tool_metadata(self): """Test that tool metadata matches requirements""" assert self.tool.get_name() == "challenge" - assert "prevent reflexive agreement" in self.tool.get_description() - assert "think critically" in self.tool.get_description() + assert "reflexive agreement" in self.tool.get_description() + assert "critical thinking" in self.tool.get_description() assert "thoughtful evaluation" in self.tool.get_description() assert self.tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL diff --git a/tests/test_chat_simple.py b/tests/test_chat_simple.py index c0cdc78..20b98c6 100644 --- a/tests/test_chat_simple.py +++ b/tests/test_chat_simple.py @@ -22,7 +22,7 @@ class TestChatTool: def test_tool_metadata(self): """Test that tool metadata matches requirements""" assert self.tool.get_name() == "chat" - assert "GENERAL CHAT & COLLABORATIVE THINKING" in self.tool.get_description() + assert "collaborative thinking" in self.tool.get_description() assert self.tool.get_system_prompt() is not None assert self.tool.get_default_temperature() > 0 assert self.tool.get_model_category() is not None diff --git a/tests/test_consensus.py b/tests/test_consensus.py index 39d0726..dc1a12e 100644 --- a/tests/test_consensus.py +++ b/tests/test_consensus.py @@ -18,7 +18,7 @@ class TestConsensusTool: tool = ConsensusTool() assert tool.get_name() == "consensus" - assert "COMPREHENSIVE CONSENSUS WORKFLOW" in tool.get_description() + assert "consensus" in tool.get_description() assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.requires_model() is False # Consensus manages its own models diff --git a/tests/test_debug.py b/tests/test_debug.py index 4290cbc..18c3ac4 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -14,7 +14,7 @@ class TestDebugTool: tool = DebugIssueTool() assert tool.get_name() == "debug" - assert "DEBUG & ROOT CAUSE ANALYSIS" in tool.get_description() + assert "debugging and root cause analysis" in tool.get_description() assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.requires_model() is True diff --git a/tests/test_listmodels.py b/tests/test_listmodels.py index 212672c..89f5f42 100644 --- a/tests/test_listmodels.py +++ b/tests/test_listmodels.py @@ -21,7 +21,7 @@ class TestListModelsTool: def test_tool_metadata(self, tool): """Test tool has correct metadata""" assert tool.name == "listmodels" - assert "LIST AVAILABLE MODELS" in tool.description + assert "model providers" in tool.description assert tool.get_request_model().__name__ == "ToolRequest" @pytest.mark.asyncio diff --git a/tests/test_planner.py b/tests/test_planner.py index 83ecffd..3464f30 100644 --- a/tests/test_planner.py +++ b/tests/test_planner.py @@ -18,7 +18,7 @@ class TestPlannerTool: tool = PlannerTool() assert tool.get_name() == "planner" - assert "SEQUENTIAL PLANNER" in tool.get_description() + assert "sequential planning" in tool.get_description() assert tool.get_default_temperature() == 0.5 # TEMPERATURE_BALANCED assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.get_default_thinking_mode() == "medium" diff --git a/tests/test_precommit_workflow.py b/tests/test_precommit_workflow.py index a6b5ebd..a28c17a 100644 --- a/tests/test_precommit_workflow.py +++ b/tests/test_precommit_workflow.py @@ -22,8 +22,8 @@ class TestPrecommitWorkflowTool: tool = PrecommitTool() assert tool.get_name() == "precommit" - assert "COMPREHENSIVE PRECOMMIT WORKFLOW" in tool.get_description() - assert "Step-by-step pre-commit validation" in tool.get_description() + assert "git changes" in tool.get_description() + assert "systematic analysis" in tool.get_description() def test_tool_model_category(self): """Test that precommit tool uses extended reasoning category""" diff --git a/tests/test_refactor.py b/tests/test_refactor.py index 8eb6f47..b14eb2d 100644 --- a/tests/test_refactor.py +++ b/tests/test_refactor.py @@ -68,11 +68,11 @@ class TestRefactorTool: def test_get_description(self, refactor_tool): """Test that the tool returns a comprehensive description""" description = refactor_tool.get_description() - assert "COMPREHENSIVE REFACTORING WORKFLOW" in description + assert "refactoring" in description assert "code smell detection" in description assert "decomposition planning" in description - assert "modernization opportunities" in description - assert "organization improvements" in description + assert "modernization" in description + assert "maintainability improvements" in description def test_get_input_schema(self, refactor_tool): """Test that the input schema includes all required workflow fields""" diff --git a/tests/test_secaudit.py b/tests/test_secaudit.py index 12eef8a..c7e1624 100644 --- a/tests/test_secaudit.py +++ b/tests/test_secaudit.py @@ -16,7 +16,7 @@ class TestSecauditTool: tool = SecauditTool() assert tool.get_name() == "secaudit" - assert "COMPREHENSIVE SECURITY AUDIT" in tool.get_description() + assert "security audit" in tool.get_description() assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.requires_model() is True @@ -312,9 +312,9 @@ class TestSecauditTool: # Verify field descriptions are comprehensive assert "OWASP Top 10" in fields["step"] - assert "security implications" in fields["step"] - assert "threat vectors" in fields["step"] - assert "application context" in fields["security_scope"] + assert "OWASP Top 10" in fields["step"] + assert "MANDATORY" in fields["step"] + assert "Security context" in fields["security_scope"] assert "threat level" in fields["threat_level"] assert "compliance frameworks" in fields["compliance_requirements"] diff --git a/tests/test_tools.py b/tests/test_tools.py index 45dbe7a..b2e6cdc 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -19,7 +19,7 @@ class TestThinkDeepTool: def test_tool_metadata(self, tool): """Test tool metadata""" assert tool.get_name() == "thinkdeep" - assert "COMPREHENSIVE INVESTIGATION & REASONING" in tool.get_description() + assert "investigation and reasoning" in tool.get_description() assert tool.get_default_temperature() == 0.7 schema = tool.get_input_schema() @@ -120,7 +120,7 @@ class TestCodeReviewTool: def test_tool_metadata(self, tool): """Test tool metadata""" assert tool.get_name() == "codereview" - assert "COMPREHENSIVE CODE REVIEW" in tool.get_description() + assert "code review" in tool.get_description() assert tool.get_default_temperature() == 0.2 schema = tool.get_input_schema() @@ -213,7 +213,7 @@ class TestAnalyzeTool: def test_tool_metadata(self, tool): """Test tool metadata""" assert tool.get_name() == "analyze" - assert "COMPREHENSIVE ANALYSIS WORKFLOW" in tool.get_description() + assert "code analysis" in tool.get_description() assert tool.get_default_temperature() == 0.2 schema = tool.get_input_schema() diff --git a/tests/test_tracer.py b/tests/test_tracer.py index 4c82e8f..7e7948d 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -23,10 +23,10 @@ class TestTracerTool: def test_get_description(self, tracer_tool): """Test that the tool returns a comprehensive description""" description = tracer_tool.get_description() - assert "STEP-BY-STEP CODE TRACING WORKFLOW" in description + assert "code tracing" in description assert "precision" in description assert "dependencies" in description - assert "guided investigation" in description + assert "systematic" in description def test_get_input_schema(self, tracer_tool): """Test that the input schema includes required fields""" diff --git a/tools/analyze.py b/tools/analyze.py index ec68132..ce2b6ce 100644 --- a/tools/analyze.py +++ b/tools/analyze.py @@ -35,11 +35,9 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for analyze workflow ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "What to analyze or look for in this step. In step 1, describe what you want to analyze and begin forming " - "an analytical approach after thinking carefully about what needs to be examined. Consider code quality, " - "performance implications, architectural patterns, and design decisions. Map out the codebase structure, " - "understand the business logic, and identify areas requiring deeper analysis. In later steps, continue " - "exploring with precision and adapt your understanding as you uncover more insights." + "The analysis plan. Step 1: State your strategy, including how you will map the codebase structure, " + "understand business logic, and assess code quality, performance implications, and architectural patterns. " + "Later steps: Report findings and adapt the approach as new insights emerge." ), "step_number": ( "The index of the current step in the analysis sequence, beginning at 1. Each step should build upon or " @@ -54,45 +52,29 @@ ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = { "analysis is complete and ready for expert validation." ), "findings": ( - "Summarize everything discovered in this step about the code being analyzed. Include analysis of architectural " - "patterns, design decisions, tech stack assessment, scalability characteristics, performance implications, " - "maintainability factors, security posture, and strategic improvement opportunities. Be specific and avoid " - "vague language—document what you now know about the codebase and how it affects your assessment. " - "IMPORTANT: Document both strengths (good patterns, solid architecture, well-designed components) and " - "concerns (tech debt, scalability risks, overengineering, unnecessary complexity). In later steps, confirm " - "or update past findings with additional evidence." + "Summary of discoveries from this step, including architectural patterns, tech stack assessment, scalability characteristics, " + "performance implications, maintainability factors, and strategic improvement opportunities. " + "IMPORTANT: Document both strengths (good patterns, solid architecture) and concerns (tech debt, overengineering, unnecessary complexity). " + "In later steps, confirm or update past findings with additional evidence." ), "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the analysis " - "investigation so far. Include even files ruled out or found to be unrelated, as this tracks your " - "exploration path." + "List all files examined (absolute paths). Include even ruled-out files to track exploration path." ), "relevant_files": ( - "Subset of files_checked (as full absolute paths) that contain code directly relevant to the analysis or " - "contain significant patterns, architectural decisions, or examples worth highlighting. Only list those that are " - "directly tied to important findings, architectural insights, performance characteristics, or strategic " - "improvement opportunities. This could include core implementation files, configuration files, or files " - "demonstrating key patterns." + "Subset of files_checked directly relevant to analysis findings (absolute paths). Include files with " + "significant patterns, architectural decisions, or strategic improvement opportunities." ), "relevant_context": ( - "List methods, functions, classes, or modules that are central to the analysis findings, in the format " - "'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize those that demonstrate important " - "patterns, represent key architectural decisions, show performance characteristics, or highlight strategic " - "improvement opportunities." - ), - "backtrack_from_step": ( - "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " - "start over. Use this to acknowledge investigative dead ends and correct the course." + "List methods/functions central to analysis findings, in 'ClassName.methodName' or 'functionName' format. " + "Prioritize those demonstrating key patterns, architectural decisions, or improvement opportunities." ), + "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."), "images": ( - "Optional list of absolute paths to architecture diagrams, design documents, or visual references " - "that help with analysis context. Only include if they materially assist understanding or assessment." + "Optional absolute paths to architecture diagrams or visual references that help with analysis context." ), "confidence": ( - "Your confidence level in the current analysis findings: exploring (early investigation), " - "low (some insights but more needed), medium (solid understanding), high (comprehensive insights), " - "very_high (very comprehensive insights), almost_certain (nearly complete analysis), " - "certain (100% confidence - complete analysis ready for expert validation)" + "Your confidence in the analysis: exploring, low, medium, high, very_high, almost_certain, or certain. " + "'certain' indicates the analysis is complete and ready for validation." ), "analysis_type": "Type of analysis to perform (architecture, performance, security, quality, general)", "output_format": "How to format the output (summary, detailed, actionable)", @@ -176,8 +158,8 @@ class AnalyzeTool(WorkflowTool): def get_description(self) -> str: return ( "Performs comprehensive code analysis with systematic investigation and expert validation. " - "Use for architectural assessment, performance evaluation, maintainability review, and pattern detection. " - "Guides through structured analysis of code structure, patterns, and strategic planning." + "Use for architecture, performance, maintainability, and pattern analysis. " + "Guides through structured code review and strategic planning." ) def get_system_prompt(self) -> str: diff --git a/tools/challenge.py b/tools/challenge.py index ea7a74c..2873025 100644 --- a/tools/challenge.py +++ b/tools/challenge.py @@ -55,7 +55,7 @@ class ChallengeTool(SimpleTool): def get_description(self) -> str: return ( "Prevents reflexive agreement when users challenge responses by forcing critical thinking and reasoned analysis. " - "Automatically triggers when users question, disagree with, or challenge previous statements in ongoing conversations. " + "Trigger automatically when users critically question, disagree with, or appear to challenge previous statements in ongoing conversations. " "Promotes truth-seeking over compliance by ensuring thoughtful evaluation rather than automatic agreement." ) diff --git a/tools/chat.py b/tools/chat.py index 62a33cc..80e6224 100644 --- a/tools/chat.py +++ b/tools/chat.py @@ -15,25 +15,19 @@ if TYPE_CHECKING: from config import TEMPERATURE_BALANCED from systemprompts import CHAT_PROMPT -from tools.shared.base_models import ToolRequest +from tools.shared.base_models import COMMON_FIELD_DESCRIPTIONS, ToolRequest from .simple.base import SimpleTool # Field descriptions matching the original Chat tool exactly CHAT_FIELD_DESCRIPTIONS = { "prompt": ( - "Provide a thorough, expressive question or idea with maximum context. Include your current thinking, specific " - "challenges, background context, what you've tried, and what kind of response would be most helpful. " - "The more context and detail you provide, the more valuable and targeted the response will be. " - "NOTE: You're talking to a thought-partner who has deep expertise and can provide nuanced insights. " - "IMPORTANT: When referring to code, use the files parameter to pass relevant file paths. Use this prompt for " - "function/method names (along with line numbers if needed) or tiny code snippets if absolutely necessary to explain " - "the issue and to refer to code from the provided paths." + "Your question or idea for collaborative thinking. Provide detailed context, including your goal, what you've tried, and any specific challenges. " + "CRITICAL: To discuss code, provide file paths using the 'files' parameter instead of pasting large code blocks here." ), "files": "Absolute full-paths to existing files / folders for context. DO NOT SHORTEN.", "images": ( - "Optional images for visual context. Useful for UI discussions, diagrams, visual problems, " - "error screens, or architectural mockups. (must be FULL absolute paths to real files / folders - DO NOT SHORTEN - OR these can be bas64 data)" + "Optional images for visual context (must be FULL absolute paths to real files / folders - DO NOT SHORTEN - OR these can be bas64 data)" ), } @@ -62,9 +56,8 @@ class ChatTool(SimpleTool): def get_description(self) -> str: return ( - "General chat and collaborative thinking partner for brainstorming, getting second opinions, and exploring ideas. " - "Use for bouncing ideas, validating approaches, asking questions, and getting explanations about concepts. " - "Perfect for collaborative analysis and general development discussions." + "General chat and collaborative thinking partner for brainstorming, development discussion, getting second opinions, and exploring ideas. " + "Use for bouncing ideas, validating approaches, asking questions, and getting explanations. " ) def get_system_prompt(self) -> str: @@ -114,36 +107,23 @@ class ChatTool(SimpleTool): "model": self.get_model_field_schema(), "temperature": { "type": "number", - "description": "Response creativity (0-1, default 0.5)", + "description": COMMON_FIELD_DESCRIPTIONS["temperature"], "minimum": 0, "maximum": 1, }, "thinking_mode": { "type": "string", "enum": ["minimal", "low", "medium", "high", "max"], - "description": ( - "Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), high (67%), " - "max (100% of model max)" - ), + "description": COMMON_FIELD_DESCRIPTIONS["thinking_mode"], }, "use_websearch": { "type": "boolean", - "description": ( - "Enable web search for documentation, best practices, and current information. " - "Particularly useful for: brainstorming sessions, architectural design discussions, " - "exploring industry best practices, working with specific frameworks/technologies, " - "researching solutions to complex problems, or when current documentation and " - "community insights would enhance the analysis." - ), + "description": COMMON_FIELD_DESCRIPTIONS["use_websearch"], "default": True, }, "continuation_id": { "type": "string", - "description": ( - "Thread continuation ID for multi-turn conversations. Can be used to continue " - "conversations across different tools. Only provide this if continuing a previous " - "conversation thread." - ), + "description": COMMON_FIELD_DESCRIPTIONS["continuation_id"], }, }, "required": ["prompt"] + (["model"] if self.is_effective_auto_mode() else []), diff --git a/tools/codereview.py b/tools/codereview.py index a309cf8..af355d0 100644 --- a/tools/codereview.py +++ b/tools/codereview.py @@ -35,82 +35,36 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for code review workflow CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Write your review plan as a technical brief to another engineer. Use direct statements: 'I will examine code structure...' NOT 'Let me examine...'. " - "Step 1: State review strategy and begin forming a systematic approach after thinking carefully about what needs to be analyzed. " - "Later steps: Report findings with precision. " - "MANDATORY: Thoroughly examine code quality, security implications, performance concerns, and architectural patterns. " - "MANDATORY: Consider not only obvious bugs and issues but also subtle concerns like over-engineering, unnecessary complexity, " - "design patterns that could be simplified, areas where architecture might not scale well, missing abstractions, " - "and ways to reduce complexity while maintaining functionality. " - "MANDATORY: Use relevant_files parameter for code files. " - "FORBIDDEN: Large code snippets in this field - use only function/method names when needed." - ), - "step_number": ( - "The index of the current step in the code review sequence, beginning at 1. Each step should build upon or " - "revise the previous one." + "Review plan. Step 1: State strategy. Later: Report findings. " + "MUST examine quality, security, performance, architecture. Use 'relevant_files' for code. NO large snippets." ), + "step_number": "Current step index in review sequence (starts at 1). Build upon previous steps.", "total_steps": ( - "Your current estimate for how many steps will be needed to complete the code review. " - "IMPORTANT: When continuation_id is provided with external validation, set this to 2 maximum " - "(step 1: quick review, step 2: complete). For internal validation continuations, set to 1 as " - "we're not starting a new multi-step investigation." + "Estimated steps needed to complete the review. " + "IMPORTANT: For external validation, max 2 steps. For internal validation, use 1 step. " + "When continuation_id is provided (continuing a previous conversation), set to 2 max for external, 1 for internal." ), "next_step_required": ( - "Set to true if you plan to continue the investigation with another step. False means you believe the " - "code review analysis is complete and ready for expert validation. CRITICAL: For external continuations, " - "set to True on step 1, then False on step 2 to trigger expert analysis. For internal continuations, " - "set to False to complete immediately." + "True to continue with another step, False when review is complete. " + "CRITICAL for external validation: Set to True on step 1, then False on step 2. " + "For internal validation: Set to False immediately. " + "When continuation_id is provided: Follow the same rules based on validation type." ), "findings": ( - "Summarize everything discovered in this step about the code being reviewed. Include analysis of code quality, " - "security concerns, performance issues, architectural patterns, design decisions, potential bugs, code smells, " - "and maintainability considerations. Be specific and avoid vague language—document what you now know about " - "the code and how it affects your assessment. IMPORTANT: Document both positive findings (good patterns, " - "proper implementations, well-designed components) and concerns (potential issues, anti-patterns, security " - "risks, performance bottlenecks). In later steps, confirm or update past findings with additional evidence." + "Discoveries: quality, security, performance, architecture. " + "Document positive+negative. Update in later steps." ), - "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the code review " - "investigation so far. Include even files ruled out or found to be unrelated, as this tracks your " - "exploration path." - ), - "relevant_files": ( - "For when this is the first step, please pass absolute file paths of relevant code to review (do not clip " - "file paths). When used for the final step, this contains a subset of files_checked (as full absolute paths) " - "that contain code directly relevant to the review or contain significant issues, patterns, or examples worth " - "highlighting. Only list those that are directly tied to important findings, security concerns, performance " - "issues, or architectural decisions. This could include core implementation files, configuration files, or " - "files with notable patterns." - ), - "relevant_context": ( - "List methods, functions, classes, or modules that are central to the code review findings, in the format " - "'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize those that contain issues, " - "demonstrate patterns, show security concerns, or represent key architectural decisions." - ), - "issues_found": ( - "List of issues identified during the investigation. Each issue should be a dictionary with 'severity' " - "(critical, high, medium, low) and 'description' fields. Include security vulnerabilities, performance " - "bottlenecks, code quality issues, architectural concerns, maintainability problems, over-engineering, " - "unnecessary complexity, etc." - ), - "review_validation_type": ( - "Type of code review validation to perform: 'external' (default - uses external model for validation) or " - "'internal' (performs validation without external model review). IMPORTANT: Always default to 'external' unless " - "the user explicitly requests internal-only validation or asks you not to use another model. External validation " - "provides additional expert review and should be the standard approach for comprehensive code review." - ), - "backtrack_from_step": ( - "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " - "start over. Use this to acknowledge investigative dead ends and correct the course." - ), - "images": ( - "Optional list of absolute paths to architecture diagrams, UI mockups, design documents, or visual references " - "that help with code review context. Only include if they materially assist understanding or assessment." - ), - "review_type": "Type of review to perform (full, security, performance, quick)", - "focus_on": "Specific aspects to focus on or additional context that would help understand areas of concern", - "standards": "Coding standards to enforce during the review", - "severity_filter": "Minimum severity level to report on the issues found", + "files_checked": "All examined files (absolute paths), including ruled-out ones.", + "relevant_files": "Step 1: All files/dirs for review. Final: Subset with key findings (issues, patterns, decisions).", + "relevant_context": "Methods/functions central to findings: 'Class.method' or 'function'. Focus on issues/patterns.", + "issues_found": "Issues with 'severity' (critical/high/medium/low) and 'description'. Vulnerabilities, performance, quality.", + "review_validation_type": "'external' (default, expert model) or 'internal' (no expert). Default external unless user specifies.", + "backtrack_from_step": "Step number to backtrack from if revision needed.", + "images": "Optional diagrams, mockups, visuals for review context (absolute paths). Include if materially helpful.", + "review_type": "Review type: full, security, performance, quick.", + "focus_on": "Specific aspects or context for areas of concern.", + "standards": "Coding standards to enforce.", + "severity_filter": "Minimum severity to report.", } diff --git a/tools/consensus.py b/tools/consensus.py index 1a81913..29fc50f 100644 --- a/tools/consensus.py +++ b/tools/consensus.py @@ -37,12 +37,9 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for consensus workflow CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "In step 1: Provide the EXACT question or proposal that ALL models will evaluate. This should be phrased as a clear " - "question or problem statement, NOT as 'I will analyze...' or 'Let me examine...'. For example: 'Should we build a " - "search component in SwiftUI for use in an AppKit app?' or 'Evaluate the proposal to migrate our database from MySQL " - "to PostgreSQL'. This exact text will be sent to all models for their independent evaluation. " - "In subsequent steps (2+): This field is for internal tracking only - you can provide notes about the model response " - "you just received. This will NOT be sent to other models (they all receive the original proposal from step 1)." + "The core question for consensus. Step 1: Provide the EXACT proposal for all models to evaluate. " + "CRITICAL: This text is sent to all models and must be a clear question, not a self-referential statement " + "(e.g., use 'Evaluate...' not 'I will evaluate...'). Steps 2+: Internal notes on the last model's response; this is NOT sent to other models." ), "step_number": ( "The index of the current step in the consensus workflow, beginning at 1. Step 1 is your analysis, " @@ -55,11 +52,9 @@ CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = { ), "next_step_required": ("Set to true if more models need to be consulted. False when ready for final synthesis."), "findings": ( - "In step 1: Provide YOUR OWN comprehensive analysis of the proposal/question. This is where you share your " - "independent evaluation, considering technical feasibility, risks, benefits, and alternatives. This analysis " - "is NOT sent to other models - it's recorded for the final synthesis. " - "In steps 2+: Summarize the key points from the model response received, noting agreements and disagreements " - "with previous analyses." + "Your analysis of the consensus topic. Step 1: Your independent, comprehensive analysis of the proposal. " + "CRITICAL: This is for the final synthesis and is NOT sent to the other models. " + "Steps 2+: A summary of the key points from the most recent model's response." ), "relevant_files": ( "Files that are relevant to the consensus analysis. Include files that help understand the proposal, " diff --git a/tools/debug.py b/tools/debug.py index 52995e8..7231e20 100644 --- a/tools/debug.py +++ b/tools/debug.py @@ -34,83 +34,39 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions matching original debug tool DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = { "step": ( - "Describe what you're currently investigating by thinking deeply about the issue and its possible causes. " - "In step 1, clearly state the issue and begin forming an investigative direction after thinking carefully" - "about the described problem. Ask further questions from the user if you think these will help with your" - "understanding and investigation. CRITICAL: Remember that reported symptoms might originate from code far from " - "where they manifest. Also be aware that after thorough investigation, you might find NO BUG EXISTS - it could " - "be a misunderstanding or expectation mismatch. Consider not only obvious failures, but also subtle " - "contributing factors like upstream logic, invalid inputs, missing preconditions, or hidden side effects. " - "Map out the flow of related functions or modules. Identify call paths where input values or branching logic " - "could cause instability. In concurrent systems, watch for race conditions, shared state, or timing " - "dependencies. In all later steps, continue exploring with precision: trace deeper dependencies, verify " - "hypotheses, and adapt your understanding as you uncover more evidence." - "IMPORTANT: When referring to code, use the relevant_files parameter to pass relevant files and only use the prompt to refer to " - "function / method names or very small code snippets if absolutely necessary to explain the issue. Do NOT " - "pass large code snippets in the prompt as this is exclusively reserved for descriptive text only. " - ), - "step_number": ( - "The index of the current step in the investigation sequence, beginning at 1. Each step should build upon or " - "revise the previous one." + "Investigation step. Step 1: State issue+direction. " + "Symptoms misleading; 'no bug' valid. Trace dependencies, verify hypotheses. " + "Use relevant_files for code; this for text only." ), + "step_number": "Current step index (starts at 1). Build upon previous steps.", "total_steps": ( - "Your current estimate for how many steps will be needed to complete the investigation. " - "Adjust as new findings emerge. IMPORTANT: When continuation_id is provided (continuing a previous " - "conversation), set this to 1 as we're not starting a new multi-step investigation." + "Estimated total steps needed to complete the investigation. Adjust as new findings emerge. " + "IMPORTANT: When continuation_id is provided (continuing a previous conversation), set this to 1 as we're not starting a new multi-step investigation." ), "next_step_required": ( - "Set to true if you plan to continue the investigation with another step. False means you believe the root " - "cause is known or the investigation is complete. IMPORTANT: When continuation_id is " - "provided (continuing a previous conversation), set this to False to immediately proceed with expert analysis." + "True if you plan to continue the investigation with another step. False means root cause is known or investigation is complete. " + "IMPORTANT: When continuation_id is provided (continuing a previous conversation), set this to False to immediately proceed with expert analysis." ), "findings": ( - "Summarize everything discovered in this step. Include new clues, unexpected behavior, evidence from code or " - "logs, or disproven theories. Be specific and avoid vague language—document what you now know and how it " - "affects your hypothesis. IMPORTANT: If you find no evidence supporting the reported issue after thorough " - "investigation, document this clearly. Finding 'no bug' is a valid outcome if the " - "investigation was comprehensive. " - "In later steps, confirm or disprove past findings with reason." - ), - "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during " - "the investigation so far. " - "Include even files ruled out, as this tracks your exploration path." - ), - "relevant_files": ( - "Subset of files_checked (as full absolute paths) that contain code directly relevant to the issue. Only list " - "those that are directly tied to the root cause or its effects. This could include the cause, trigger, or " - "place of manifestation." - ), - "relevant_context": ( - "List methods or functions that are central to the issue, in the format " - "'ClassName.methodName' or 'functionName'. " - "Prioritize those that influence or process inputs, drive branching, or pass state between modules." + "Discoveries: clues, code/log evidence, disproven theories. Be specific. " + "If no bug found, document clearly as valid." ), + "files_checked": "All examined files (absolute paths), including ruled-out ones.", + "relevant_files": "Files directly relevant to issue (absolute paths). Cause, trigger, or manifestation locations.", + "relevant_context": "Methods/functions central to issue: 'Class.method' or 'function'. Focus on inputs/branching/state.", "hypothesis": ( - "A concrete theory for what's causing the issue based on the evidence so far. This can include suspected " - "failures, incorrect assumptions, or violated constraints. VALID HYPOTHESES INCLUDE: 'No bug found - possible " - "user misunderstanding' or 'Symptoms appear unrelated to any code issue' if evidence supports this. When " - "no bug is found, consider suggesting: 'Recommend discussing with thought partner/engineering assistant for " - "clarification of expected behavior.' You are encouraged to revise or abandon hypotheses in later steps as " - "needed based on evidence." + "Concrete root cause theory from evidence. Can revise. " + "Valid: 'No bug found - user misunderstanding' or 'Symptoms unrelated to code' if supported." ), "confidence": ( - "Indicate your current confidence in the hypothesis. Use: 'exploring' (starting out), 'low' (early idea), " - "'medium' (some supporting evidence), 'high' (strong evidence), 'very_high' (very strong evidence), " - "'almost_certain' (nearly confirmed), 'certain' (200% confidence - root cause and minimal fix are both " - "confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be " - "fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 200% sure. Using 'certain' " - "means you have ABSOLUTE confidence locally and prevents external model validation. Also do " - "NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed." - ), - "backtrack_from_step": ( - "If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to " - "start over. Use this to acknowledge investigative dead ends and correct the course." - ), - "images": ( - "Optional list of absolute paths to screenshots or UI visuals that clarify the issue. " - "Only include if they materially assist understanding or hypothesis formulation." + "Your confidence in the hypothesis: exploring (starting out), low (early idea), medium (some evidence), " + "high (strong evidence), very_high (very strong evidence), almost_certain (nearly confirmed), " + "certain (100% confidence - root cause and fix are both confirmed locally with no need for external validation). " + "WARNING: Do NOT use 'certain' unless the issue can be fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. " + "Using 'certain' means you have ABSOLUTE confidence locally and PREVENTS external model validation." ), + "backtrack_from_step": "Step number to backtrack from if revision needed.", + "images": "Optional screenshots/visuals clarifying issue (absolute paths).", } diff --git a/tools/docgen.py b/tools/docgen.py index 9f70759..b56e395 100644 --- a/tools/docgen.py +++ b/tools/docgen.py @@ -37,15 +37,9 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for documentation generation DOCGEN_FIELD_DESCRIPTIONS = { "step": ( - "For step 1: DISCOVERY PHASE ONLY - describe your plan to discover ALL files that need documentation in the current directory. " - "DO NOT document anything yet. Count all files, list them clearly, report the total count, then IMMEDIATELY proceed to step 2. " - "For step 2 and beyond: DOCUMENTATION PHASE - describe what you're currently documenting, focusing on ONE FILE at a time " - "to ensure complete coverage of all functions and methods within that file. CRITICAL: DO NOT ALTER ANY CODE LOGIC - " - "only add documentation (docstrings, comments). ALWAYS use MODERN documentation style for the programming language " - '(e.g., /// for Objective-C, /** */ for Java/JavaScript, """ for Python, // for Swift/C++, etc. - NEVER use legacy styles). ' - "Consider complexity analysis, call flow information, and parameter descriptions. " - "If you find bugs or logic issues, TRACK THEM but DO NOT FIX THEM - report after documentation is complete. " - "Report progress using num_files_documented out of total_files_to_document counters." + "Step 1 (DISCOVERY): Plan to discover ALL files needing documentation; count and list them clearly. DO NOT document yet. " + "Step 2+ (DOCUMENTATION): Document ONE file at a time. CRITICAL: DO NOT ALTER CODE LOGIC - only add documentation. " + "If you find bugs, TRACK them but DO NOT FIX. Report progress using counters." ), "step_number": ( "The index of the current step in the documentation generation sequence, beginning at 1. Each step should build upon or " @@ -60,40 +54,24 @@ DOCGEN_FIELD_DESCRIPTIONS = { "documentation plan is complete and ready for implementation." ), "findings": ( - "Summarize everything discovered in this step about the code and its documentation needs. Include analysis of missing " - "documentation, complexity assessments, call flow understanding, and opportunities for improvement. Be specific and " - "avoid vague language—document what you now know about the code structure and how it affects your documentation plan. " - "IMPORTANT: Document both well-documented areas (good examples to follow) and areas needing documentation. " - "ALWAYS use MODERN documentation style appropriate for the programming language (/// for Objective-C, /** */ for Java/JavaScript, " - '""" for Python, // for Swift/C++, etc. - NEVER use legacy /* */ style for languages that have modern alternatives). ' - "If you discover ANY BUGS OR LOGIC ERRORS (critical or non-critical), IMMEDIATELY STOP " - "the documentation workflow and ask the user directly if this bug should be addressed before continuing. " - "This includes: incorrect logic, wrong calculations, backwards conditions, inverted values, missing error handling, " - "security vulnerabilities, performance issues, or any code that doesn't match its intended function name/purpose. " - "NEVER document code with known bugs - always stop and report to user first. " - "In later steps, confirm or update past findings with additional evidence." + "Summary of documentation needs found in this step. Note missing docs, complexity, and call flows. " + "IMPORTANT: Document both well-documented areas and areas needing docs. " + "CRITICAL: If ANY bugs are found, STOP and report them immediately before continuing documentation." ), "relevant_files": ( - "Current focus files (as full absolute paths) for this step. In each step, focus on documenting " - "ONE FILE COMPLETELY before moving to the next. This should contain only the file(s) being " - "actively documented in the current step, not all files that might need documentation." + "Current focus files (absolute paths) for this step. Focus on documenting ONE FILE completely per step." ), "relevant_context": ( - "List methods, functions, or classes that need documentation, in the format " - "'ClassName.methodName' or 'functionName'. " - "Prioritize those with complex logic, important interfaces, or missing/inadequate documentation." + "List methods/functions needing documentation, in 'ClassName.methodName' or 'functionName' format. " + "Prioritize complex logic, important interfaces, or missing documentation." ), "num_files_documented": ( - "CRITICAL COUNTER: Number of files you have COMPLETELY documented so far. Start at 0. " - "Increment by 1 only when a file is 100% documented (all functions/methods have documentation). " - "This counter prevents premature completion - you CANNOT set next_step_required=false " - "unless num_files_documented equals total_files_to_document." + "Counter for fully documented files. Starts at 0. Increment only when a file is 100% complete. " + "CRITICAL: Must equal 'total_files_to_document' to finish." ), "total_files_to_document": ( - "CRITICAL COUNTER: Total number of files discovered that need documentation in current directory. " - "Set this in step 1 after discovering all files. This is the target number - when " - "num_files_documented reaches this number, then and ONLY then can you set next_step_required=false. " - "This prevents stopping after documenting just one file." + "Counter for total files needing documentation. Set in step 1 during discovery. " + "This is the completion target for the 'num_files_documented' counter." ), "document_complexity": ( "Whether to include algorithmic complexity (Big O) analysis in function/method documentation. " diff --git a/tools/listmodels.py b/tools/listmodels.py index f23fc23..7fa0c90 100644 --- a/tools/listmodels.py +++ b/tools/listmodels.py @@ -34,9 +34,7 @@ class ListModelsTool(BaseTool): return "listmodels" def get_description(self) -> str: - return ( - "Shows which AI model providers are configured, available model names, their aliases and capabilities." - ) + return "Shows which AI model providers are configured, available model names, their aliases and capabilities." def get_input_schema(self) -> dict[str, Any]: """Return the JSON schema for the tool's input""" diff --git a/tools/planner.py b/tools/planner.py index 7983e4f..95225d2 100644 --- a/tools/planner.py +++ b/tools/planner.py @@ -39,11 +39,8 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions matching original planner tool PLANNER_FIELD_DESCRIPTIONS = { "step": ( - "Your current planning step. For the first step, describe the task/problem to plan and be extremely expressive " - "so that subsequent steps can break this down into simpler steps. " - "For subsequent steps, provide the actual planning step content. Can include: regular planning steps, " - "revisions of previous steps, questions about previous decisions, realizations about needing more analysis, " - "changes in approach, etc." + "Your current planning step content. Step 1: Describe the task/problem to plan in detail for breakdown. " + "Subsequent steps: Provide planning content (steps, revisions, questions, approach changes, etc.)." ), "step_number": "Current step number in the planning sequence (starts at 1)", "total_steps": "Current estimate of total steps needed (can be adjusted up/down as planning progresses)", diff --git a/tools/precommit.py b/tools/precommit.py index c053af5..29978f1 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -34,84 +34,37 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for precommit workflow PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Write your validation plan as a technical brief to another engineer. Use direct statements: 'I will examine git changes...' NOT 'Let me examine...'. " - "Step 1: State validation strategy. Later steps: Report findings with precision. " - "MANDATORY: Examine ALL git repos, staged/unstaged changes, understand modification scope/intent. " - "MANDATORY: Analyze security, performance, maintainability impacts. " - "MANDATORY: Use relevant_files parameter for code files. " - "FORBIDDEN: Large code snippets in this field - use only function/method names when needed." - ), - "step_number": ( - "The index of the current step in the pre-commit investigation sequence, beginning at 1. Each step should " - "build upon or revise the previous one." + "Validation plan. Step 1: State strategy. Later: Report findings. " + "MUST examine git changes, analyze impacts. Use 'relevant_files' for code. NO large snippets." ), + "step_number": "Current step index in pre-commit sequence (starts at 1). Build upon previous steps.", "total_steps": ( - "Your current estimate for how many steps will be needed to complete the pre-commit investigation. " - "IMPORTANT: When continuation_id is provided with external validation, " - "set this to no more than 3 (step 1: gather git changes, step 2: continue investigation, step 3: complete). For internal validation " - "continuations, set to 1 as we're not starting a new multi-step investigation." + "Estimated steps needed to complete validation. " + "IMPORTANT: For external validation, use max 3 steps. For internal validation, use 1 step. " + "When continuation_id is provided (continuing a previous conversation), set to 3 max for external, 1 for internal." ), "next_step_required": ( - "Set to true if you plan to continue the investigation with another step. False means you believe the " - "pre-commit analysis is complete and ready for expert validation. CRITICAL: If total_steps >= 3, you MUST set " - "next_step_required=True for all steps before the final step. Only set to False when step_number equals total_steps. " - "For external continuations, set to False only on the final step to trigger expert analysis." + "True to continue with another step, False when validation is complete. " + "CRITICAL: If total_steps>=3, set to True until the final step. " + "When continuation_id is provided: Follow the same validation rules based on precommit_type." ), "findings": ( - "Summarize everything discovered in this step about the changes being committed. Include analysis of git diffs, " - "file modifications, new functionality, potential issues identified, code quality observations, and security " - "considerations. Be specific and avoid vague language—document what you now know about the changes and how " - "they affect your assessment. IMPORTANT: Document both positive findings (good patterns, proper implementations) " - "and concerns (potential bugs, missing tests, security risks). In later steps, confirm or update past findings " - "with additional evidence." + "Discoveries: git diffs, modifications, issues (bugs, missing tests, security). " + "Document positive+concerns. Update in later steps." ), - "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the pre-commit " - "investigation so far. Include even files ruled out or found to be unchanged, as this tracks your " - "exploration path." - ), - "relevant_files": ( - "Subset of files_checked (as full absolute paths) that contain changes or are directly relevant to the " - "commit validation. Only list those that are directly tied to the changes being committed, their dependencies, " - "or files that need validation. This could include modified files, related configuration, tests, or " - "documentation." - ), - "relevant_context": ( - "List methods, functions, classes, or modules that are central to the changes being committed, in the format " - "'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize those that are modified, added, " - "or significantly affected by the changes." - ), - "issues_found": ( - "List of issues identified during the investigation. Each issue should be a dictionary with 'severity' " - "(critical, high, medium, low) and 'description' fields. Include potential bugs, security concerns, " - "performance issues, missing tests, incomplete implementations, etc." - ), - "precommit_type": ( - "Type of pre-commit validation to perform: 'external' (default - uses external model for validation) or 'internal' " - "(performs validation without external model review). IMPORTANT: Always default to 'external' unless the " - "user explicitly requests internal-only validation or asks you not to use another model. External validation " - "provides additional expert review and should be the standard approach for comprehensive pre-commit validation." - ), - "backtrack_from_step": ( - "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " - "start over. Use this to acknowledge investigative dead ends and correct the course." - ), - "images": ( - "Optional list of absolute paths to screenshots, UI mockups, or visual references that help validate the " - "changes. Only include if they materially assist understanding or assessment of the commit." - ), - "path": ( - "Starting absolute path to the directory to search for git repositories (must be FULL absolute paths - " - "DO NOT SHORTEN). REQUIRED for step 1." - ), - "compare_to": ( - "Optional: A git ref (branch, tag, commit hash) to compare against. Check remote branches if local does not exist." - "If not provided, investigates local staged and unstaged changes." - ), - "include_staged": "Analyzes staged changes for a local commit. This parameter is ignored if 'compare_to' is provided.", - "include_unstaged": "Analyzes unstaged (uncommitted) changes for a local commit. This parameter is ignored if 'compare_to' is provided.", - "focus_on": "Specific aspects to focus on (e.g., 'security implications', 'performance impact', 'test coverage').", - "severity_filter": "Minimum severity level to report on the changes.", + "files_checked": "All examined files (absolute paths), including ruled-out ones.", + "relevant_files": "Files with changes or relevant to validation (absolute paths). Modified files, config, tests, docs.", + "relevant_context": "Methods/functions central to changes: 'Class.method' or 'function'. Focus on modified/added.", + "issues_found": "Issues with 'severity' (critical/high/medium/low) and 'description'. Bugs, security, performance.", + "precommit_type": "'external' (default, expert review) or 'internal' (local only). Default external unless user specifies.", + "backtrack_from_step": "Step number to backtrack from if revision needed.", + "images": "Optional screenshots/visuals for validation (absolute paths).", + "path": "Starting path for git repos (FULL absolute path). REQUIRED step 1.", + "compare_to": "Optional git ref (branch/tag/commit) to compare. Checks remotes if needed. Without: checks staged/unstaged.", + "include_staged": "Analyze staged changes. Ignored if 'compare_to' provided.", + "include_unstaged": "Analyze unstaged changes. Ignored if 'compare_to' provided.", + "focus_on": "Focus aspects: security, performance, test coverage.", + "severity_filter": "Minimum severity to report.", } diff --git a/tools/refactor.py b/tools/refactor.py index 297436e..dda8899 100644 --- a/tools/refactor.py +++ b/tools/refactor.py @@ -35,18 +35,9 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for refactor tool REFACTOR_FIELD_DESCRIPTIONS = { "step": ( - "Describe what you're currently investigating for refactoring by thinking deeply about the code structure, " - "patterns, and potential improvements. In step 1, clearly state your refactoring investigation plan and begin " - "forming a systematic approach after thinking carefully about what needs to be analyzed. CRITICAL: Remember to " - "thoroughly examine code quality, performance implications, maintainability concerns, and architectural patterns. " - "Consider not only obvious code smells and issues but also opportunities for decomposition, modernization, " - "organization improvements, and ways to reduce complexity while maintaining functionality. Map out the codebase " - "structure, understand the business logic, and identify areas requiring refactoring. In all later steps, continue " - "exploring with precision: trace dependencies, verify assumptions, and adapt your understanding as you uncover " - "more refactoring opportunities." - "IMPORTANT: When referring to code, use the relevant_files parameter to pass relevant files and only use the prompt to refer to " - "function / method names or very small code snippets if absolutely necessary to explain the issue. Do NOT " - "pass large code snippets in the prompt as this is exclusively reserved for descriptive text only. " + "The refactoring plan. Step 1: State strategy. Later steps: Report findings. " + "CRITICAL: Examine code for smells, and opportunities for decomposition, modernization, and organization. " + "Use 'relevant_files' for code. FORBIDDEN: Large code snippets." ), "step_number": ( "The index of the current step in the refactoring investigation sequence, beginning at 1. Each step should " @@ -61,51 +52,33 @@ REFACTOR_FIELD_DESCRIPTIONS = { "refactoring analysis is complete and ready for expert validation." ), "findings": ( - "Summarize everything discovered in this step about refactoring opportunities in the code. Include analysis of " - "code smells, decomposition opportunities, modernization possibilities, organization improvements, architectural " - "patterns, design decisions, potential performance optimizations, and maintainability enhancements. Be specific " - "and avoid vague language—document what you now know about the code and how it could be improved. IMPORTANT: " - "Document both positive aspects (good patterns, well-designed components) and improvement opportunities " - "(code smells, overly complex functions, outdated patterns, organization issues). In later steps, confirm or " - "update past findings with additional evidence." + "Summary of discoveries from this step, including code smells and opportunities for decomposition, modernization, or organization. " + "Document both strengths and weaknesses. In later steps, confirm or update past findings." ), "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the refactoring " - "investigation so far. Include even files ruled out or found to need no refactoring, as this tracks your " - "exploration path." + "List all files examined (absolute paths). Include even ruled-out files to track exploration path." ), "relevant_files": ( - "Subset of files_checked (as full absolute paths) that contain code requiring refactoring or are directly " - "relevant to the refactoring opportunities identified. Only list those that are directly tied to specific " - "refactoring opportunities, code smells, decomposition needs, or improvement areas. This could include files " - "with code smells, overly large functions/classes, outdated patterns, or organization issues." + "Subset of files_checked with code requiring refactoring (absolute paths). Include files with " + "code smells, decomposition needs, or improvement opportunities." ), "relevant_context": ( - "List methods, functions, classes, or modules that are central to the refactoring opportunities identified, " - "in the format 'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize those that contain " - "code smells, need decomposition, could benefit from modernization, or require organization improvements." + "List methods/functions central to refactoring opportunities, in 'ClassName.methodName' or 'functionName' format. " + "Prioritize those with code smells or needing improvement." ), "issues_found": ( - "List of refactoring opportunities identified during the investigation. Each opportunity should be a dictionary " - "with 'severity' (critical, high, medium, low), 'type' (codesmells, decompose, modernize, organization), and " - "'description' fields. Include code smells, decomposition opportunities, modernization possibilities, " - "organization improvements, performance optimizations, maintainability enhancements, etc." + "Refactoring opportunities as dictionaries with 'severity' (critical/high/medium/low), " + "'type' (codesmells/decompose/modernize/organization), and 'description'. " + "Include all improvement opportunities found." ), "confidence": ( - "Indicate your current confidence in the refactoring analysis completeness. Use: 'exploring' (starting " - "analysis), 'incomplete' (just started or significant work remaining), 'partial' (some refactoring " - "opportunities identified but more analysis needed), 'complete' (comprehensive refactoring analysis " - "finished with all major opportunities identified and the CLI agent can handle 100% confidently without help). " - "Use 'complete' ONLY when you have fully analyzed all code, identified all significant refactoring " - "opportunities, and can provide comprehensive recommendations without expert assistance. When files are " - "too large to read fully or analysis is uncertain, use 'partial'. Using 'complete' prevents expert " - "analysis to save time and money. Do NOT set confidence to 'certain' if the user has strongly requested that " - "external validation MUST be performed." - ), - "backtrack_from_step": ( - "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " - "start over. Use this to acknowledge investigative dead ends and correct the course." + "Your confidence in refactoring analysis: exploring (starting), incomplete (significant work remaining), " + "partial (some opportunities found, more analysis needed), complete (comprehensive analysis finished, " + "all major opportunities identified). " + "WARNING: Use 'complete' ONLY when fully analyzed and can provide recommendations without expert help. " + "'complete' PREVENTS expert validation. Use 'partial' for large files or uncertain analysis." ), + "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."), "images": ( "Optional list of absolute paths to architecture diagrams, UI mockups, design documents, or visual references " "that help with refactoring context. Only include if they materially assist understanding or assessment." diff --git a/tools/secaudit.py b/tools/secaudit.py index cafe08d..38d5244 100644 --- a/tools/secaudit.py +++ b/tools/secaudit.py @@ -36,85 +36,40 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for security audit workflow SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Describe what you're currently investigating for security audit by thinking deeply about security " - "implications, threat vectors, and protection mechanisms. In step 1, clearly state your security " - "audit plan and begin forming a systematic approach after identifying the application type, " - "technology stack, and relevant security requirements. You must begin by passing the file path " - "for the initial code you are about to audit in relevant_files. CRITICAL: Follow the OWASP Top 10 " - "systematic checklist, examine authentication/authorization mechanisms, analyze input validation " - "and data handling, assess dependency vulnerabilities, and evaluate infrastructure security. " - "Consider not only obvious vulnerabilities but also subtle security gaps, configuration issues, " - "design flaws, and compliance requirements. Map out the attack surface, understand the threat " - "landscape, and identify areas requiring deeper security analysis. In all later steps, continue " - "exploring with precision: trace security dependencies, verify security assumptions, and adapt " - "your understanding as you uncover security evidence." - ), - "step_number": ( - "The index of the current step in the security audit sequence, beginning at 1. Each step should " - "build upon or revise the previous one." - ), - "total_steps": ( - "Your current estimate for how many steps will be needed to complete the security audit. " - "Adjust and increase as new security findings emerge." - ), - "next_step_required": ( - "Set to true if you plan to continue the investigation with another step. False means you believe " - "the security audit analysis is complete and ALL threats have been uncovered, ready for expert validation." + "Audit plan. Step 1: State strategy. Later: Report findings. " + "MANDATORY: Systematic approach (OWASP Top 10, auth, validation). Use 'relevant_files'. NO large code." ), + "step_number": "Current step in audit sequence (starts at 1).", + "total_steps": "Estimated steps for audit. Adjust as findings emerge.", + "next_step_required": ("True to continue. False when ALL threats uncovered, ready for validation."), "findings": ( - "Summarize everything discovered in this step about security aspects of the code being audited. " - "Include analysis of security vulnerabilities, authentication/authorization issues, input validation " - "gaps, encryption weaknesses, configuration problems, and compliance concerns. Be specific and avoid " - "vague language—document what you now know about the security posture and how it affects your " - "assessment. IMPORTANT: Document both positive security findings (proper implementations, good " - "security practices) and concerns (vulnerabilities, security gaps, compliance issues). In later " - "steps, confirm or update past findings with additional evidence." - ), - "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the security " - "audit investigation so far. Include even files ruled out or found to be unrelated, as this tracks " - "your exploration path." + "Discoveries: vulnerabilities, auth issues, validation gaps, compliance. " + "Document positives and concerns. Update past findings." ), + "files_checked": "All files examined (absolute paths). Include ruled-out files.", "relevant_files": ( - "For when this is the first step, please pass absolute file paths of relevant code to audit (do not clip " - "file paths). When used for the final step, this contains a subset of files_checked (as full absolute paths) " - "that contain code directly relevant to the security audit or contain significant security issues, patterns, " - "or examples worth highlighting. Only list those that are directly tied to important security findings, " - "vulnerabilities, authentication issues, or security architectural decisions. This could include " - "authentication modules, input validation files, configuration files, or files with notable security patterns." + "Step 1: Files to audit (absolute paths). " "Final: Files with security issues, auth modules, config files." ), "relevant_context": ( - "List methods, functions, classes, or modules that are central to the security audit findings, in the " - "format 'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize those that contain " - "security vulnerabilities, demonstrate security patterns, show authentication/authorization logic, or " - "represent key security architectural decisions." + "Security-critical methods/classes: 'ClassName.methodName'. " + "Focus on vulnerabilities, auth logic, security patterns." ), "issues_found": ( - "List of security issues identified during the investigation. Each issue should be a dictionary with " - "'severity' (critical, high, medium, low) and 'description' fields. Include security vulnerabilities, " - "authentication bypasses, authorization flaws, injection vulnerabilities, cryptographic weaknesses, " - "configuration issues, compliance gaps, etc." + "Security issues as dict: 'severity' (critical/high/medium/low), 'description'. " + "Include vulnerabilities, auth flaws, injection, crypto weakness, config issues." ), "confidence": ( - "Indicate your current confidence in the security audit assessment. Use: 'exploring' (starting analysis), " - "'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), " - "'very_high' (very strong evidence), 'almost_certain' (nearly complete audit), 'certain' " - "(100% confidence - security audit is thoroughly complete and all significant security issues are identified with no need for external model validation). " - "Do NOT use 'certain' unless the security audit is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. " - "Using 'certain' means you have complete confidence locally and prevents external model validation." - ), - "backtrack_from_step": ( - "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which " - "to start over. Use this to acknowledge investigative dead ends and correct the course." + "exploring/low/medium/high/very_high/almost_certain/certain. " + "CRITICAL: 'certain' PREVENTS external validation." ), + "backtrack_from_step": "Step number to backtrack from if revision needed.", "images": ( - "Optional list of absolute paths to architecture diagrams, security models, threat models, or visual " - "references that help with security audit context. Only include if they materially assist understanding " - "or assessment of security posture." + "Optional: Architecture diagrams, security models, threat models (absolute paths). " + "Only if assists security assessment." ), "security_scope": ( - "Define the security scope and application context (web app, mobile app, API, enterprise system, " - "cloud service). Include technology stack, user types, data sensitivity, and threat landscape. " + "Security context (web/mobile/API/enterprise/cloud). " + "Include stack, user types, data sensitivity, threat landscape. " "This helps focus the security assessment appropriately." ), "threat_level": ( diff --git a/tools/shared/base_models.py b/tools/shared/base_models.py index 946bdb7..2510f71 100644 --- a/tools/shared/base_models.py +++ b/tools/shared/base_models.py @@ -21,57 +21,54 @@ logger = logging.getLogger(__name__) # Shared field descriptions to avoid duplication COMMON_FIELD_DESCRIPTIONS = { "model": ( - "Model to use. See tool's input schema for available models and their capabilities. " + "Model to use. See tool's input schema for available models. " "Use 'auto' to let Claude select the best model for the task." ), "temperature": ( - "Temperature for response (0.0 to 1.0). Lower values are more focused and deterministic, " - "higher values are more creative. Tool-specific defaults apply if not specified." + "Lower values: focused/deterministic; higher: creative. Tool-specific defaults apply if unspecified." ), "thinking_mode": ( - "Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), high (67%), " - "max (100% of model max). Higher modes enable deeper reasoning at the cost of speed." + "Thinking depth: minimal (0.5%), low (8%), medium (33%), high (67%), " + "max (100% of model max). Higher modes: deeper reasoning but slower." ), "use_websearch": ( - "Enable web search for documentation and current information. Model can request Claude to perform " - "searches during conversation. Useful for: architecture discussions, best practices, framework docs, " - "solution research, or when current information would enhance analysis." + "Enable web search for docs and current info. Model can request Claude to perform web-search for " + "best practices, framework docs, solution research, latest API information." ), "continuation_id": ( - "Thread continuation ID for multi-turn conversations. Automatically reuse the last continuation_id " - "when this appears to be a follow-up or related discussion (unless user explicitly provides a different ID). " - "When provided, the tool embeds complete conversation history as context. Your response should build upon this history " - "without repeating previous analysis. Focus on providing only new insights. Works across different tools." + "Unique thread continuation ID for multi-turn conversations. Reuse last continuation_id " + "when continuing discussion (unless user provides different ID) using exact unique identifer. " + "Embeds complete conversation history. Build upon history without repeating. " + "Focus on new insights. Works across different tools." ), "images": ( - "Optional images for visual context. MUST be absolute file paths or base64 data. " - "Only use when user mentions images. Describe what each image contains. " - "Useful for: UI, diagrams, error screens, mockups, visual analysis." + "Optional images for visual context. MUST be absolute paths or base64. " + "Use when user mentions images. Describe image contents. " ), - "files": ("Optional files for context (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)"), + "files": ("Optional files for context (FULL absolute paths to real files/folders - DO NOT SHORTEN)"), } # Workflow-specific field descriptions WORKFLOW_FIELD_DESCRIPTIONS = { "step": "Current work step content and findings from your overall work", - "step_number": "Current step number in the work sequence (starts at 1)", - "total_steps": "Estimated total steps needed to complete the work", - "next_step_required": "Whether another work step is needed after this one. When false, aim to reduce total_steps to match step_number to avoid mismatch.", - "findings": "Important findings, evidence and insights discovered in this step of the work", + "step_number": "Current step number in work sequence (starts at 1)", + "total_steps": "Estimated total steps needed to complete work", + "next_step_required": "Whether another work step is needed. When false, aim to reduce total_steps to match step_number to avoid mismatch.", + "findings": "Important findings, evidence and insights discovered in this step", "files_checked": "List of files examined during this work step", - "relevant_files": "Files identified as relevant to the issue/goal (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", + "relevant_files": "Files identified as relevant to issue/goal (FULL absolute paths to real files/folders - DO NOT SHORTEN)", "relevant_context": "Methods/functions identified as involved in the issue", "issues_found": "Issues identified with severity levels during work", "confidence": ( - "Confidence level in findings: exploring (just starting), low (early investigation), " + "Confidence level: exploring (just starting), low (early investigation), " "medium (some evidence), high (strong evidence), very_high (comprehensive understanding), " "almost_certain (near complete confidence), certain (100% confidence locally - no external validation needed)" ), - "hypothesis": "Current theory about the issue/goal based on work", + "hypothesis": "Current theory about issue/goal based on work", "backtrack_from_step": "Step number to backtrack from if work needs revision", "use_assistant_model": ( - "Whether to use assistant model for expert analysis after completing the workflow steps. " - "Set to False to skip expert analysis and rely solely on Claude's investigation. " + "Use assistant model for expert analysis after workflow steps. " + "False skips expert analysis, relies solely on Claude's investigation. " "Defaults to True for comprehensive validation." ), } @@ -171,16 +168,16 @@ class ConsolidatedFindings(BaseModel): files_checked: set[str] = Field(default_factory=set, description="All files examined across all steps") relevant_files: set[str] = Field( default_factory=set, - description="A subset of files_checked that have been identified as relevant for the work at hand", + description="Subset of files_checked identified as relevant for work at hand", ) relevant_context: set[str] = Field( - default_factory=set, description="All methods/functions identified during overall work being performed" + default_factory=set, description="All methods/functions identified during overall work" ) - findings: list[str] = Field(default_factory=list, description="Chronological list of findings from each work step") - hypotheses: list[dict] = Field(default_factory=list, description="Evolution of hypotheses across work steps") - issues_found: list[dict] = Field(default_factory=list, description="All issues found with severity levels") - images: list[str] = Field(default_factory=list, description="Images collected during overall work") - confidence: str = Field("low", description="Latest confidence level from work steps") + findings: list[str] = Field(default_factory=list, description="Chronological findings from each work step") + hypotheses: list[dict] = Field(default_factory=list, description="Evolution of hypotheses across steps") + issues_found: list[dict] = Field(default_factory=list, description="All issues with severity levels") + images: list[str] = Field(default_factory=list, description="Images collected during work") + confidence: str = Field("low", description="Latest confidence level from steps") # Tool-specific field descriptions are now declared in each tool file diff --git a/tools/testgen.py b/tools/testgen.py index 1601ec0..8584249 100644 --- a/tools/testgen.py +++ b/tools/testgen.py @@ -35,11 +35,8 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for test generation workflow TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "What to analyze or look for in this step. In step 1, describe what you want to test and begin forming an " - "analytical approach after thinking carefully about what needs to be examined. Consider code structure, " - "business logic, critical paths, edge cases, and potential failure modes. Map out the codebase structure, " - "understand the functionality, and identify areas requiring test coverage. In later steps, continue exploring " - "with precision and adapt your understanding as you uncover more insights about testable behaviors." + "The test plan for this step. Step 1: State strategy for analyzing code structure, business logic, critical paths, and edge cases. " + "Later steps: Report findings and adapt as new test scenarios are identified." ), "step_number": ( "The index of the current step in the test generation sequence, beginning at 1. Each step should build upon or " @@ -54,27 +51,20 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = { "test generation analysis is complete and ready for expert validation." ), "findings": ( - "Summarize everything discovered in this step about the code being tested. Include analysis of functionality, " - "critical paths, edge cases, boundary conditions, error handling, async behavior, state management, and " - "integration points. Be specific and avoid vague language—document what you now know about the code and " - "what test scenarios are needed. IMPORTANT: Document both the happy paths and potential failure modes. " - "Identify existing test patterns if examples were provided. In later steps, confirm or update past findings " - "with additional evidence." + "Summary of discoveries about the code being tested. Include analysis of functionality, critical paths, edge cases, " + "boundary conditions, and error handling. IMPORTANT: Document both happy paths and failure modes. " + "Identify existing test patterns. In later steps, confirm or update past findings." ), "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the test generation " - "investigation so far. Include even files ruled out or found to be unrelated, as this tracks your " - "exploration path." + "List all files examined (absolute paths). Include even ruled-out files to track exploration path." ), "relevant_files": ( - "Subset of files_checked (as full absolute paths) that contain code directly needing tests or are essential " - "for understanding test requirements. Only list those that are directly tied to the functionality being tested. " - "This could include implementation files, interfaces, dependencies, or existing test examples." + "Subset of files_checked containing code needing tests (absolute paths). Include implementation files, " + "interfaces, dependencies, or existing test examples." ), "relevant_context": ( - "List methods, functions, classes, or modules that need test coverage, in the format " - "'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize critical business logic, " - "public APIs, complex algorithms, and error-prone code paths." + "List methods/functions needing test coverage, in 'ClassName.methodName' or 'functionName' format. " + "Prioritize critical business logic, public APIs, and error-prone code paths." ), "confidence": ( "Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), " @@ -84,10 +74,7 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = { "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. " "Using 'certain' means you have complete confidence locally and prevents external model validation." ), - "backtrack_from_step": ( - "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " - "start over. Use this to acknowledge investigative dead ends and correct the course." - ), + "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."), "images": ( "Optional list of absolute paths to architecture diagrams, flow charts, or visual documentation that help " "understand the code structure and test requirements. Only include if they materially assist test planning." diff --git a/tools/thinkdeep.py b/tools/thinkdeep.py index d46edf8..5c0b7f5 100644 --- a/tools/thinkdeep.py +++ b/tools/thinkdeep.py @@ -34,68 +34,47 @@ class ThinkDeepWorkflowRequest(WorkflowRequest): """Request model for thinkdeep workflow tool with comprehensive investigation capabilities""" # Core workflow parameters - step: str = Field(description="Current work step content and findings from your overall work") - step_number: int = Field(description="Current step number in the work sequence (starts at 1)", ge=1) - total_steps: int = Field(description="Estimated total steps needed to complete the work", ge=1) - next_step_required: bool = Field(description="Whether another work step is needed after this one") + step: str = Field(description="Current work step content and findings") + step_number: int = Field(description="Current step number (starts at 1)", ge=1) + total_steps: int = Field(description="Estimated total steps needed", ge=1) + next_step_required: bool = Field(description="Whether another step is needed") findings: str = Field( - description="Summarize everything discovered in this step about the problem/goal. Include new insights, " - "connections made, implications considered, alternative approaches, potential issues identified, " - "and evidence from thinking. Be specific and avoid vague language—document what you now know " - "and how it affects your hypothesis or understanding. IMPORTANT: If you find compelling evidence " - "that contradicts earlier assumptions, document this clearly. In later steps, confirm or update " - "past findings with additional reasoning." + description="Discoveries: insights, connections, implications, evidence. " + "Document contradictions to earlier assumptions. Update past findings." ) # Investigation tracking files_checked: list[str] = Field( default_factory=list, - description="List all files (as absolute paths) examined during the investigation so far. " - "Include even files ruled out or found unrelated, as this tracks your exploration path.", + description="All files examined (absolute paths). Include ruled-out files.", ) relevant_files: list[str] = Field( default_factory=list, - description="Subset of files_checked (as full absolute paths) that contain information directly " - "relevant to the problem or goal. Only list those directly tied to the root cause, " - "solution, or key insights. This could include the source of the issue, documentation " - "that explains the expected behavior, configuration files that affect the outcome, or " - "examples that illustrate the concept being analyzed.", + description="Files relevant to problem/goal (absolute paths). Include root cause, solution, key insights.", ) relevant_context: list[str] = Field( default_factory=list, - description="Key concepts, methods, or principles that are central to the thinking analysis, " - "in the format 'concept_name' or 'ClassName.methodName'. Focus on those that drive " - "the core insights, represent critical decision points, or define the scope of the analysis.", + description="Key concepts/methods: 'concept_name' or 'ClassName.methodName'. Focus on core insights, decision points.", ) hypothesis: Optional[str] = Field( default=None, - description="Current theory or understanding about the problem/goal based on evidence gathered. " - "This should be a concrete theory that can be validated or refined through further analysis. " - "You are encouraged to revise or abandon hypotheses in later steps based on new evidence.", + description="Current theory based on evidence. Revise in later steps.", ) # Analysis metadata issues_found: list[dict] = Field( default_factory=list, - description="Issues identified during work with severity levels - each as a dict with " - "'severity' (critical, high, medium, low) and 'description' fields.", + description="Issues with dict: 'severity' (critical/high/medium/low), 'description'.", ) confidence: str = Field( default="low", - description="Indicate your current confidence in the analysis. Use: 'exploring' (starting analysis), " - "'low' (early thinking), 'medium' (some insights gained), 'high' (strong understanding), " - "'very_high' (very strong understanding), 'almost_certain' (nearly complete analysis), " - "'certain' (100% confidence - analysis is complete and conclusions are definitive with no need for external model validation). " - "Do NOT use 'certain' unless the thinking is comprehensively complete, use 'very_high' or 'almost_certain' instead when in doubt. " - "Using 'certain' means you have complete confidence locally and prevents external model validation.", + description="exploring/low/medium/high/very_high/almost_certain/certain. CRITICAL: 'certain' PREVENTS external validation.", ) # Advanced workflow features backtrack_from_step: Optional[int] = Field( default=None, - description="If an earlier finding or hypothesis needs to be revised or discarded, " - "specify the step number from which to start over. Use this to acknowledge analytical " - "dead ends and correct the course.", + description="Step number to backtrack from if revision needed.", ge=1, ) @@ -103,30 +82,27 @@ class ThinkDeepWorkflowRequest(WorkflowRequest): # in expert analysis (commented out exclude=True) temperature: Optional[float] = Field( default=None, - description="Temperature for creative thinking (0-1, default 0.7)", + description="Creative thinking temp (0-1, default 0.7)", ge=0.0, le=1.0, - # exclude=True # Excluded from MCP schema but available for internal use ) thinking_mode: Optional[str] = Field( default=None, - description="Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), high (67%), max (100% of model max). Defaults to 'high' if not specified.", - # exclude=True # Excluded from MCP schema but available for internal use + description="Depth: minimal/low/medium/high/max. Default 'high'.", ) use_websearch: Optional[bool] = Field( default=None, - description="Enable web search for documentation, best practices, and current information. Particularly useful for: brainstorming sessions, architectural design discussions, exploring industry best practices, working with specific frameworks/technologies, researching solutions to complex problems, or when current documentation and community insights would enhance the analysis.", - # exclude=True # Excluded from MCP schema but available for internal use + description="Enable web search for docs, brainstorming, architecture, solutions.", ) # Context files and investigation scope problem_context: Optional[str] = Field( default=None, - description="Provide additional context about the problem or goal. Be as expressive as possible. More information will be very helpful for the analysis.", + description="Additional context about problem/goal. Be expressive.", ) focus_areas: Optional[list[str]] = Field( default=None, - description="Specific aspects to focus on (architecture, performance, security, etc.)", + description="Focus aspects (architecture, performance, security, etc.)", ) @@ -177,12 +153,12 @@ class ThinkDeepTool(WorkflowTool): thinkdeep_field_overrides = { "problem_context": { "type": "string", - "description": "Provide additional context about the problem or goal. Be as expressive as possible. More information will be very helpful for the analysis.", + "description": "Additional context about problem/goal. Be expressive.", }, "focus_areas": { "type": "array", "items": {"type": "string"}, - "description": "Specific aspects to focus on (architecture, performance, security, etc.)", + "description": "Focus aspects (architecture, performance, security, etc.)", }, } diff --git a/tools/tracer.py b/tools/tracer.py index c37073d..e16af9b 100644 --- a/tools/tracer.py +++ b/tools/tracer.py @@ -38,14 +38,9 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for tracer workflow TRACER_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Describe what you're currently investigating for code tracing by thinking deeply about the code structure, " - "execution paths, and dependencies. In step 1, if trace_mode is 'ask', MUST prompt user to choose between " - "precision or dependencies mode with clear explanations. Otherwise, clearly state your tracing plan and begin " - "forming a systematic approach after thinking carefully about what needs to be analyzed. CRITICAL: For precision " - "mode, focus on execution flow, call chains, and usage patterns. For dependencies mode, focus on structural " - "relationships and bidirectional dependencies. Map out the code structure, understand the business logic, and " - "identify areas requiring deeper tracing. In all later steps, continue exploring with precision: trace dependencies, " - "verify call paths, and adapt your understanding as you uncover more evidence." + "The plan for the current tracing step. Step 1: State the tracing strategy. Later steps: Report findings and adapt the plan. " + "CRITICAL: For 'precision' mode, focus on execution flow and call chains. For 'dependencies' mode, focus on structural relationships. " + "If trace_mode is 'ask' in step 1, you MUST prompt the user to choose a mode." ), "step_number": ( "The index of the current step in the tracing sequence, beginning at 1. Each step should build upon or " @@ -60,46 +55,29 @@ TRACER_WORKFLOW_FIELD_DESCRIPTIONS = { "tracing analysis is complete and ready for final output formatting." ), "findings": ( - "Summarize everything discovered in this step about the code being traced. Include analysis of execution " - "paths, dependency relationships, call chains, structural patterns, and any discoveries about how the code " - "works. Be specific and avoid vague language—document what you now know about the code and how it affects " - "your tracing analysis. IMPORTANT: Document both the direct relationships (immediate calls, dependencies) " - "and indirect relationships (transitive dependencies, side effects). In later steps, confirm or update past " - "findings with additional evidence." + "Summary of discoveries from this step, including execution paths, dependency relationships, call chains, and structural patterns. " + "IMPORTANT: Document both direct (immediate calls) and indirect (transitive, side effects) relationships." ), "files_checked": ( - "List all files (as absolute paths, do not clip or shrink file names) examined during the tracing " - "investigation so far. Include even files ruled out or found to be unrelated, as this tracks your " - "exploration path." + "List all files examined (absolute paths). Include even ruled-out files to track exploration path." ), "relevant_files": ( - "Subset of files_checked (as full absolute paths) that contain code directly relevant to the tracing analysis. " - "Only list those that are directly tied to the target method/function/class/module being traced, its " - "dependencies, or its usage patterns. This could include implementation files, related modules, or files " - "demonstrating key relationships." + "Subset of files_checked directly relevant to the tracing target (absolute paths). Include implementation files, " + "dependencies, or files demonstrating key relationships." ), "relevant_context": ( - "List methods, functions, classes, or modules that are central to the tracing analysis, in the format " - "'ClassName.methodName', 'functionName', or 'module.ClassName'. Prioritize those that are part of the " - "execution flow, dependency chain, or represent key relationships in the tracing analysis." + "List methods/functions central to the tracing analysis, in 'ClassName.methodName' or 'functionName' format. " + "Prioritize those in the execution flow or dependency chain." ), "confidence": ( - "Indicate your current confidence in the tracing analysis completeness. Use: 'exploring' (starting analysis), " - "'low' (early investigation), 'medium' (some patterns identified), 'high' (comprehensive understanding), " - "'very_high' (very comprehensive understanding), 'almost_certain' (nearly complete tracing), " - "'certain' (100% confidence - tracing analysis is finished and ready for output with no need for external model validation). " - "Do NOT use 'certain' unless the tracing analysis is thoroughly finished and you have a comprehensive understanding " - "of the code relationships. Using 'certain' means you have complete confidence locally and prevents external model validation." + "Your confidence in the tracing analysis. Use: 'exploring', 'low', 'medium', 'high', 'very_high', 'almost_certain', 'certain'. " + "CRITICAL: 'certain' implies the analysis is 100% complete locally and PREVENTS external model validation." ), "trace_mode": "Type of tracing: 'ask' (default - prompts user to choose mode), 'precision' (execution flow) or 'dependencies' (structural relationships)", "target_description": ( - "Detailed description of what to trace and WHY you need this analysis. MUST include context about what " - "you're trying to understand, debug, analyze or find." - ), - "images": ( - "Optional images of system architecture diagrams, flow charts, or visual references to help " - "understand the tracing context" + "Description of what to trace and WHY. Include context about what you're trying to understand or analyze." ), + "images": ("Optional paths to architecture diagrams or flow charts that help understand the tracing context."), } diff --git a/tools/version.py b/tools/version.py index fc8745f..ce81987 100644 --- a/tools/version.py +++ b/tools/version.py @@ -140,9 +140,7 @@ class VersionTool(BaseTool): return "version" def get_description(self) -> str: - return ( - "Get server version, configuration details, and list of available tools." - ) + return "Get server version, configuration details, and list of available tools." def get_input_schema(self) -> dict[str, Any]: """Return the JSON schema for the tool's input"""