From a509730dca6d41b3b71117532567a6cf05131a6e Mon Sep 17 00:00:00 2001 From: Fahad Date: Tue, 17 Jun 2025 20:49:53 +0400 Subject: [PATCH] New Planner tool to help you break down complex ideas, problems, and projects into multiple manageable steps. This is a self-prompt generation tool whose output can then be fed into another tool and model as required --- .gitignore | 2 + README.md | 94 ++-- config.py | 2 +- docs/tools/planner.md | 83 ++++ server.py | 7 + simulator_tests/__init__.py | 6 + .../test_planner_continuation_history.py | 361 ++++++++++++++ simulator_tests/test_planner_validation.py | 436 +++++++++++++++++ systemprompts/__init__.py | 2 + systemprompts/planner_prompt.py | 124 +++++ tests/test_planner.py | 413 ++++++++++++++++ tests/test_server.py | 5 +- tools/__init__.py | 2 + tools/planner.py | 440 ++++++++++++++++++ 14 files changed, 1940 insertions(+), 37 deletions(-) create mode 100644 docs/tools/planner.md create mode 100644 simulator_tests/test_planner_continuation_history.py create mode 100644 simulator_tests/test_planner_validation.py create mode 100644 systemprompts/planner_prompt.py create mode 100644 tests/test_planner.py create mode 100644 tools/planner.py diff --git a/.gitignore b/.gitignore index bcccb6b..f8462b0 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,5 @@ test-setup/ # Scratch feature documentation files FEATURE_*.md +# Temporary files +/tmp/ diff --git a/README.md b/README.md index 7ef8710..718c8e5 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,20 @@ problem-solving, and collaborative development. **Features true AI orchestration with conversations that continue across tasks** - Give Claude a complex task and let it orchestrate between models automatically. Claude stays in control, performs the actual work, -but gets perspectives from the best AI for each subtask. With tools like [`analyze`](#7-analyze---smart-file-analysis) for -understanding codebases, [`codereview`](#4-codereview---professional-code-review) for audits, [`refactor`](#8-refactor---intelligent-code-refactoring) for -improving code structure, [`debug`](#6-debug---expert-debugging-assistant) for solving complex problems, and [`precommit`](#5-precommit---pre-commit-validation) for +but gets perspectives from the best AI for each subtask. With tools like [`planner`](#3-planner---interactive-sequential-planning) for +breaking down complex projects, [`analyze`](#8-analyze---smart-file-analysis) for understanding codebases, +[`codereview`](#5-codereview---professional-code-review) for audits, [`refactor`](#9-refactor---intelligent-code-refactoring) for +improving code structure, [`debug`](#7-debug---expert-debugging-assistant) for solving complex problems, and [`precommit`](#6-precommit---pre-commit-validation) for validating changes, Claude can switch between different tools _and_ models mid-conversation, with context carrying forward seamlessly. **Example Workflow - Claude Code:** 1. Performs its own reasoning -2. Uses Gemini Pro to deeply [`analyze`](#7-analyze---smart-file-analysis) the code in question for a second opinion +2. Uses Gemini Pro to deeply [`analyze`](#8-analyze---smart-file-analysis) the code in question for a second opinion 3. Switches to O3 to continue [`chatting`](#1-chat---general-development-chat--collaborative-thinking) about its findings 4. Uses Flash to evaluate formatting suggestions from O3 5. Performs the actual work after taking in feedback from all three -6. Returns to Pro for a [`precommit`](#5-precommit---pre-commit-validation) review +6. Returns to Pro for a [`precommit`](#6-precommit---pre-commit-validation) review All within a single conversation thread! Gemini Pro in step 6 _knows_ what was recommended by O3 in step 3! Taking that context and review into consideration to aid with its pre-commit review. @@ -48,14 +49,15 @@ and review into consideration to aid with its pre-commit review. - **Tools Reference** - [`chat`](#1-chat---general-development-chat--collaborative-thinking) - Collaborative thinking - [`thinkdeep`](#2-thinkdeep---extended-reasoning-partner) - Extended reasoning - - [`consensus`](#3-consensus---multi-model-perspective-gathering) - Multi-model consensus analysis - - [`codereview`](#4-codereview---professional-code-review) - Code review - - [`precommit`](#5-precommit---pre-commit-validation) - Pre-commit validation - - [`debug`](#6-debug---expert-debugging-assistant) - Debugging help - - [`analyze`](#7-analyze---smart-file-analysis) - File analysis - - [`refactor`](#8-refactor---intelligent-code-refactoring) - Code refactoring with decomposition focus - - [`tracer`](#9-tracer---static-code-analysis-prompt-generator) - Call-flow mapping and dependency tracing - - [`testgen`](#10-testgen---comprehensive-test-generation) - Test generation with edge cases + - [`planner`](#3-planner---interactive-sequential-planning) - Interactive sequential planning + - [`consensus`](#4-consensus---multi-model-perspective-gathering) - Multi-model consensus analysis + - [`codereview`](#5-codereview---professional-code-review) - Code review + - [`precommit`](#6-precommit---pre-commit-validation) - Pre-commit validation + - [`debug`](#7-debug---expert-debugging-assistant) - Debugging help + - [`analyze`](#8-analyze---smart-file-analysis) - File analysis + - [`refactor`](#9-refactor---intelligent-code-refactoring) - Code refactoring with decomposition focus + - [`tracer`](#10-tracer---static-code-analysis-prompt-generator) - Call-flow mapping and dependency tracing + - [`testgen`](#11-testgen---comprehensive-test-generation) - Test generation with edge cases - [`your custom tool`](#add-your-own-tools) - Create custom tools for specialized workflows - **Advanced Usage** @@ -263,6 +265,7 @@ Just ask Claude naturally: **Quick Tool Selection Guide:** - **Need a thinking partner?** → `chat` (brainstorm ideas, get second opinions, validate approaches) - **Need deeper thinking?** → `thinkdeep` (extends analysis, finds edge cases) +- **Need to break down complex projects?** → `planner` (step-by-step planning, project structure, breaking down complex ideas) - **Need multiple perspectives?** → `consensus` (get diverse expert opinions on proposals and decisions) - **Code needs review?** → `codereview` (bugs, security, performance issues) - **Pre-commit validation?** → `precommit` (validate git changes before committing) @@ -288,16 +291,17 @@ Just ask Claude naturally: **Tools Overview:** 1. [`chat`](docs/tools/chat.md) - Collaborative thinking and development conversations 2. [`thinkdeep`](docs/tools/thinkdeep.md) - Extended reasoning and problem-solving -3. [`consensus`](docs/tools/consensus.md) - Multi-model consensus analysis with stance steering -4. [`codereview`](docs/tools/codereview.md) - Professional code review with severity levels -5. [`precommit`](docs/tools/precommit.md) - Validate git changes before committing -6. [`debug`](docs/tools/debug.md) - Root cause analysis and debugging -7. [`analyze`](docs/tools/analyze.md) - General-purpose file and code analysis -8. [`refactor`](docs/tools/refactor.md) - Code refactoring with decomposition focus -9. [`tracer`](docs/tools/tracer.md) - Static code analysis prompt generator for call-flow mapping -10. [`testgen`](docs/tools/testgen.md) - Comprehensive test generation with edge case coverage -11. [`listmodels`](docs/tools/listmodels.md) - Display all available AI models organized by provider -12. [`version`](docs/tools/version.md) - Get server version and configuration +3. [`planner`](docs/tools/planner.md) - Interactive sequential planning for complex projects +4. [`consensus`](docs/tools/consensus.md) - Multi-model consensus analysis with stance steering +5. [`codereview`](docs/tools/codereview.md) - Professional code review with severity levels +6. [`precommit`](docs/tools/precommit.md) - Validate git changes before committing +7. [`debug`](docs/tools/debug.md) - Root cause analysis and debugging +8. [`analyze`](docs/tools/analyze.md) - General-purpose file and code analysis +9. [`refactor`](docs/tools/refactor.md) - Code refactoring with decomposition focus +10. [`tracer`](docs/tools/tracer.md) - Static code analysis prompt generator for call-flow mapping +11. [`testgen`](docs/tools/testgen.md) - Comprehensive test generation with edge case coverage +12. [`listmodels`](docs/tools/listmodels.md) - Display all available AI models organized by provider +13. [`version`](docs/tools/version.md) - Get server version and configuration ### 1. `chat` - General Development Chat & Collaborative Thinking Your thinking partner for brainstorming, getting second opinions, and validating approaches. Perfect for technology comparisons, architecture discussions, and collaborative problem-solving. @@ -318,7 +322,27 @@ and find out what the root cause is **[📖 Read More](docs/tools/thinkdeep.md)** - Enhanced analysis capabilities and critical evaluation process -### 3. `consensus` - Multi-Model Perspective Gathering +### 3. `planner` - Interactive Step-by-Step Planning +Break down complex projects or ideas into manageable, structured plans through step-by-step thinking. +Perfect for adding new features to an existing system, scaling up system design, migration strategies, +and architectural planning with branching and revision capabilities. + +#### Pro Tip +Claude supports `sub-tasks` where it will spawn and run separate background tasks. You can ask Claude to +run Zen's planner with two separate ideas. Then when it's done, use Zen's `consensus` tool to pass the entire +plan and get expert perspective from two powerful AI models on which one to work on first! Like performing **AB** testing +in one-go without the wait! + +``` +Create two separate sub-tasks: in one, using planner tool show me how to add natural language support +to my cooking app. In the other sub-task, use planner to plan how to add support for voice notes to my cooking app. +Once done, start a consensus by sharing both plans to o3 and flash to give me the final verdict. Which one do +I implement first? +``` + +**[📖 Read More](docs/tools/planner.md)** - Step-by-step planning methodology and multi-session continuation + +### 4. `consensus` - Multi-Model Perspective Gathering Get diverse expert opinions from multiple AI models on technical proposals and decisions. Supports stance steering (for/against/neutral) and structured decision-making. ``` @@ -328,7 +352,7 @@ migrate from REST to GraphQL for our API. I need a definitive answer. **[📖 Read More](docs/tools/consensus.md)** - Multi-model orchestration and decision analysis -### 4. `codereview` - Professional Code Review +### 5. `codereview` - Professional Code Review Comprehensive code analysis with prioritized feedback and severity levels. Supports security reviews, performance analysis, and coding standards enforcement. ``` @@ -338,7 +362,7 @@ and there may be more potential vulnerabilities. Find and share related code." **[📖 Read More](docs/tools/codereview.md)** - Professional review capabilities and parallel analysis -### 5. `precommit` - Pre-Commit Validation +### 6. `precommit` - Pre-Commit Validation Comprehensive review of staged/unstaged git changes across multiple repositories. Validates changes against requirements and detects potential regressions. ``` @@ -348,7 +372,7 @@ Perform a thorough precommit with o3, we want to only highlight critical issues, **[📖 Read More](docs/tools/precommit.md)** - Multi-repository validation and change analysis -### 6. `debug` - Expert Debugging Assistant +### 7. `debug` - Expert Debugging Assistant Root cause analysis for complex problems with systematic hypothesis generation. Supports error context, stack traces, and structured debugging approaches. ``` @@ -359,7 +383,7 @@ why this is happening and what the root cause is and its fix **[📖 Read More](docs/tools/debug.md)** - Advanced debugging methodologies and troubleshooting -### 7. `analyze` - Smart File Analysis +### 8. `analyze` - Smart File Analysis General-purpose code understanding and exploration. Supports architecture analysis, pattern detection, and comprehensive codebase exploration. ``` @@ -368,7 +392,7 @@ Use gemini to analyze main.py to understand how it works **[📖 Read More](docs/tools/analyze.md)** - Code analysis types and exploration capabilities -### 8. `refactor` - Intelligent Code Refactoring +### 9. `refactor` - Intelligent Code Refactoring Comprehensive refactoring analysis with top-down decomposition strategy. Prioritizes structural improvements and provides precise implementation guidance. ``` @@ -377,7 +401,7 @@ Use gemini pro to decompose my_crazy_big_class.m into smaller extensions **[📖 Read More](docs/tools/refactor.md)** - Refactoring strategy and progressive analysis approach -### 9. `tracer` - Static Code Analysis Prompt Generator +### 10. `tracer` - Static Code Analysis Prompt Generator Creates detailed analysis prompts for call-flow mapping and dependency tracing. Generates structured analysis requests for precision execution flow or dependency mapping. ``` @@ -386,7 +410,7 @@ Use zen tracer to analyze how UserAuthManager.authenticate is used and why **[📖 Read More](docs/tools/tracer.md)** - Prompt generation and analysis modes -### 10. `testgen` - Comprehensive Test Generation +### 11. `testgen` - Comprehensive Test Generation Generates thorough test suites with edge case coverage based on existing code and test framework. Uses multi-agent workflow for realistic failure mode analysis. ``` @@ -395,7 +419,7 @@ Use zen to generate tests for User.login() method **[📖 Read More](docs/tools/testgen.md)** - Test generation strategy and framework support -### 11. `listmodels` - List Available Models +### 12. `listmodels` - List Available Models Display all available AI models organized by provider, showing capabilities, context windows, and configuration status. ``` @@ -404,7 +428,7 @@ Use zen to list available models **[📖 Read More](docs/tools/listmodels.md)** - Model capabilities and configuration details -### 12. `version` - Server Information +### 13. `version` - Server Information Get server version, configuration details, and system status for debugging and troubleshooting. ``` @@ -422,6 +446,7 @@ Zen supports powerful structured prompts in Claude Code for quick access to tool #### Tool Prompts - `/zen:chat ask local-llama what 2 + 2 is` - Use chat tool with auto-selected model - `/zen:thinkdeep use o3 and tell me why the code isn't working in sorting.swift` - Use thinkdeep tool with auto-selected model +- `/zen:planner break down the microservices migration project into manageable steps` - Use planner tool with auto-selected model - `/zen:consensus use o3:for and flash:against and tell me if adding feature X is a good idea for the project. Pass them a summary of what it does.` - Use consensus tool with default configuration - `/zen:codereview review for security module ABC` - Use codereview tool with auto-selected model - `/zen:debug table view is not scrolling properly, very jittery, I suspect the code is in my_controller.m` - Use debug tool with auto-selected model @@ -432,6 +457,7 @@ Zen supports powerful structured prompts in Claude Code for quick access to tool #### Advanced Examples - `/zen:thinkdeeper check if the algorithm in @sort.py is performant and if there are alternatives we could explore` +- `/zen:planner create a step-by-step plan for migrating our authentication system to OAuth2, including dependencies and rollback strategies` - `/zen:consensus debate whether we should migrate to GraphQL for our API` - `/zen:precommit confirm these changes match our requirements in COOL_FEATURE.md` - `/zen:testgen write me tests for class ABC` @@ -440,7 +466,7 @@ Zen supports powerful structured prompts in Claude Code for quick access to tool #### Syntax Format The prompt format is: `/zen:[tool] [your_message]` -- `[tool]` - Any available tool name (chat, thinkdeep, codereview, debug, analyze, consensus, etc.) +- `[tool]` - Any available tool name (chat, thinkdeep, planner, consensus, codereview, debug, analyze, etc.) - `[your_message]` - Your request, question, or instructions for the tool **Note:** All prompts will show as "(MCP) [tool]" in Claude Code to indicate they're provided by the MCP server. diff --git a/config.py b/config.py index a5f5af3..80b4fc2 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "4.9.3" +__version__ = "5.0.0" # Last update date in ISO format __updated__ = "2025-06-17" # Primary maintainer diff --git a/docs/tools/planner.md b/docs/tools/planner.md new file mode 100644 index 0000000..548e4c6 --- /dev/null +++ b/docs/tools/planner.md @@ -0,0 +1,83 @@ +# Planner Tool - Interactive Step-by-Step Planning + +**Break down complex projects into manageable, structured plans through step-by-step thinking** + +The `planner` tool helps you break down complex ideas, problems, or projects into multiple manageable steps. Perfect for system design, migration strategies, +architectural planning, and feature development with branching and revision capabilities. + +## How It Works + +The planner tool enables step-by-step thinking with incremental plan building: + +1. **Start with step 1**: Describe the task or problem to plan +2. **Continue building**: Add subsequent steps, building the plan piece by piece +3. **Revise when needed**: Update earlier decisions as new insights emerge +4. **Branch alternatives**: Explore different approaches when multiple options exist +5. **Continue across sessions**: Resume planning later with full context + +## Example Prompts + +#### Pro Tip +Claude supports `sub-tasks` where it will spawn and run separate background tasks. You can ask Claude to +run Zen's planner with two separate ideas. Then when it's done, use Zen's `consensus` tool to pass the entire +plan and get expert perspective from two powerful AI models on which one to work on first! Like performing **AB** testing +in one-go without the wait! + +``` +Create two separate sub-tasks: in one, using planner tool show me how to add natural language support +to my cooking app. In the other sub-task, use planner to plan how to add support for voice notes to my cooking app. +Once done, start a consensus by sharing both plans to o3 and flash to give me the final verdict. Which one do +I implement first? +``` + +``` +Use zen's planner and show me how to add real-time notifications to our mobile app +``` + +``` +Using the planner tool, show me how to add CoreData sync to my app, include any sub-steps +``` + +## Key Features + +- **Step-by-step breakdown**: Build plans incrementally with full context awareness +- **Branching support**: Explore alternative approaches when needed +- **Revision capabilities**: Update earlier decisions as new insights emerge +- **Multi-session continuation**: Resume planning across multiple sessions with context +- **Dynamic adjustment**: Modify step count and approach as planning progresses +- **Visual presentation**: ASCII charts, diagrams, and structured formatting +- **Professional output**: Clean, structured plans without emojis or time estimates + +## More Examples + +``` +Using planner, plan the architecture for a new real-time chat system with 100k concurrent users +``` + +``` +Create a plan using zen for migrating our React app from JavaScript to TypeScript +``` + +``` +Develop a plan using zen for implementing CI/CD pipelines across our development teams +``` + +## Best Practices + +- **Start broad, then narrow**: Begin with high-level strategy, then add implementation details +- **Include constraints**: Consider technical, organizational, and resource limitations +- **Plan for validation**: Include testing and verification steps +- **Think about dependencies**: Identify what needs to happen before each step +- **Consider alternatives**: Note when multiple approaches are viable +- **Enable continuation**: Use continuation_id for multi-session planning + +## Continue With a New Plan + +Like all other tools in Zen, you can `continue` with a new plan using the output from a previous plan by simply saying + +``` +Continue with zen's consensus tool and find out what o3:for and flash:against think of the plan +``` + +You can mix and match and take one output and feed it into another, continuing from where you left off using a different +tool / model combination. \ No newline at end of file diff --git a/server.py b/server.py index cde5423..b334ebc 100644 --- a/server.py +++ b/server.py @@ -54,6 +54,7 @@ from tools import ( ConsensusTool, DebugIssueTool, ListModelsTool, + PlannerTool, Precommit, RefactorTool, TestGenerationTool, @@ -161,6 +162,7 @@ TOOLS = { "chat": ChatTool(), # Interactive development chat and brainstorming "consensus": ConsensusTool(), # Multi-model consensus for diverse perspectives on technical proposals "listmodels": ListModelsTool(), # List all available AI models by provider + "planner": PlannerTool(), # A task or problem to plan out as several smaller steps "precommit": Precommit(), # Pre-commit validation of git changes "testgen": TestGenerationTool(), # Comprehensive test generation with edge case coverage "refactor": RefactorTool(), # Intelligent code refactoring suggestions with precise line references @@ -214,6 +216,11 @@ PROMPT_TEMPLATES = { "description": "Trace code execution paths", "template": "Generate tracer analysis with {model}", }, + "planner": { + "name": "planner", + "description": "Break down complex ideas, problems, or projects into multiple manageable steps", + "template": "Create a detailed plan with {model}", + }, "listmodels": { "name": "listmodels", "description": "List available AI models", diff --git a/simulator_tests/__init__.py b/simulator_tests/__init__.py index 6f1a2df..a4ddbfe 100644 --- a/simulator_tests/__init__.py +++ b/simulator_tests/__init__.py @@ -24,6 +24,8 @@ from .test_ollama_custom_url import OllamaCustomUrlTest from .test_openrouter_fallback import OpenRouterFallbackTest from .test_openrouter_models import OpenRouterModelsTest from .test_per_tool_deduplication import PerToolDeduplicationTest +from .test_planner_continuation_history import PlannerContinuationHistoryTest +from .test_planner_validation import PlannerValidationTest from .test_redis_validation import RedisValidationTest from .test_refactor_validation import RefactorValidationTest from .test_testgen_validation import TestGenValidationTest @@ -46,6 +48,8 @@ TEST_REGISTRY = { "ollama_custom_url": OllamaCustomUrlTest, "openrouter_fallback": OpenRouterFallbackTest, "openrouter_models": OpenRouterModelsTest, + "planner_validation": PlannerValidationTest, + "planner_continuation_history": PlannerContinuationHistoryTest, "token_allocation_validation": TokenAllocationValidationTest, "testgen_validation": TestGenValidationTest, "refactor_validation": RefactorValidationTest, @@ -75,6 +79,8 @@ __all__ = [ "OllamaCustomUrlTest", "OpenRouterFallbackTest", "OpenRouterModelsTest", + "PlannerValidationTest", + "PlannerContinuationHistoryTest", "TokenAllocationValidationTest", "TestGenValidationTest", "RefactorValidationTest", diff --git a/simulator_tests/test_planner_continuation_history.py b/simulator_tests/test_planner_continuation_history.py new file mode 100644 index 0000000..463c82d --- /dev/null +++ b/simulator_tests/test_planner_continuation_history.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 +""" +Planner Continuation History Test + +Tests the planner tool's continuation history building across multiple completed planning sessions: +- Multiple completed planning sessions in sequence +- History context loading for new planning sessions +- Proper context building with multiple completed plans +- Context accumulation and retrieval +""" + +import json +from typing import Optional + +from .base_test import BaseSimulatorTest + + +class PlannerContinuationHistoryTest(BaseSimulatorTest): + """Test planner tool's continuation history building across multiple completed sessions""" + + @property + def test_name(self) -> str: + return "planner_continuation_history" + + @property + def test_description(self) -> str: + return "Planner tool continuation history building across multiple completed planning sessions" + + def run_test(self) -> bool: + """Test planner continuation history building across multiple completed sessions""" + try: + self.logger.info("Test: Planner continuation history validation") + + # Test 1: Complete first planning session (microservices migration) + if not self._test_first_planning_session(): + return False + + # Test 2: Complete second planning session with context from first + if not self._test_second_planning_session(): + return False + + # Test 3: Complete third planning session with context from both previous + if not self._test_third_planning_session(): + return False + + # Test 4: Validate context accumulation across all sessions + if not self._test_context_accumulation(): + return False + + self.logger.info(" ✅ All planner continuation history tests passed") + return True + + except Exception as e: + self.logger.error(f"Planner continuation history test failed: {e}") + return False + + def _test_first_planning_session(self) -> bool: + """Complete first planning session - microservices migration""" + try: + self.logger.info(" 2.1: First planning session - Microservices Migration") + + # Step 1: Start migration planning + self.logger.info(" 2.1.1: Start migration planning") + response1, continuation_id = self.call_mcp_tool( + "planner", + { + "step": "I need to plan a microservices migration for our monolithic e-commerce platform. Let me analyze the current monolith structure.", + "step_number": 1, + "total_steps": 3, + "next_step_required": True, + }, + ) + + if not response1 or not continuation_id: + self.logger.error("Failed to start first planning session") + return False + + # Step 2: Domain identification + self.logger.info(" 2.1.2: Domain identification") + response2, _ = self.call_mcp_tool( + "planner", + { + "step": "I've identified key domains: User Management, Product Catalog, Order Processing, Payment, and Inventory. Each will become a separate microservice.", + "step_number": 2, + "total_steps": 3, + "next_step_required": True, + "continuation_id": continuation_id, + }, + ) + + if not response2: + self.logger.error("Failed step 2 of first planning session") + return False + + # Step 3: Complete migration plan + self.logger.info(" 2.1.3: Complete migration plan") + response3, _ = self.call_mcp_tool( + "planner", + { + "step": "Migration strategy: Phase 1 - Extract User Management service, Phase 2 - Product Catalog and Inventory services, Phase 3 - Order Processing and Payment services. Use API Gateway for service coordination.", + "step_number": 3, + "total_steps": 3, + "next_step_required": False, # Complete the session + "continuation_id": continuation_id, + }, + ) + + if not response3: + self.logger.error("Failed to complete first planning session") + return False + + # Validate completion + response3_data = self._parse_planner_response(response3) + if not response3_data.get("planning_complete"): + self.logger.error("First planning session not marked as complete") + return False + + if not response3_data.get("plan_summary"): + self.logger.error("First planning session missing plan summary") + return False + + self.logger.info(" ✅ First planning session completed successfully") + + # Store for next test + self.first_continuation_id = continuation_id + return True + + except Exception as e: + self.logger.error(f"First planning session test failed: {e}") + return False + + def _test_second_planning_session(self) -> bool: + """Complete second planning session with context from first""" + try: + self.logger.info(" 2.2: Second planning session - Database Strategy") + + # Step 1: Start database planning with previous context + self.logger.info(" 2.2.1: Start database strategy with microservices context") + response1, new_continuation_id = self.call_mcp_tool( + "planner", + { + "step": "Now I need to plan the database strategy for the microservices architecture. I'll design how each service will manage its data.", + "step_number": 1, + "total_steps": 2, + "next_step_required": True, + "continuation_id": self.first_continuation_id, # Use first session's continuation_id + }, + ) + + if not response1 or not new_continuation_id: + self.logger.error("Failed to start second planning session") + return False + + # Validate context loading + response1_data = self._parse_planner_response(response1) + if "previous_plan_context" not in response1_data: + self.logger.error("Second session should load context from first completed session") + return False + + # Check context contains migration content + context = response1_data["previous_plan_context"].lower() + if "migration" not in context and "microservices" not in context: + self.logger.error("Context should contain migration/microservices content from first session") + return False + + self.logger.info(" ✅ Second session loaded context from first completed session") + + # Step 2: Complete database plan + self.logger.info(" 2.2.2: Complete database strategy") + response2, _ = self.call_mcp_tool( + "planner", + { + "step": "Database strategy: Each microservice gets its own database (database-per-service pattern). Use event sourcing for cross-service communication and eventual consistency. Implement CQRS for read/write separation.", + "step_number": 2, + "total_steps": 2, + "next_step_required": False, # Complete the session + "continuation_id": new_continuation_id, + }, + ) + + if not response2: + self.logger.error("Failed to complete second planning session") + return False + + # Validate completion + response2_data = self._parse_planner_response(response2) + if not response2_data.get("planning_complete"): + self.logger.error("Second planning session not marked as complete") + return False + + self.logger.info(" ✅ Second planning session completed successfully") + + # Store for next test + self.second_continuation_id = new_continuation_id + return True + + except Exception as e: + self.logger.error(f"Second planning session test failed: {e}") + return False + + def _test_third_planning_session(self) -> bool: + """Complete third planning session with context from both previous""" + try: + self.logger.info(" 2.3: Third planning session - Deployment Strategy") + + # Step 1: Start deployment planning with accumulated context + self.logger.info(" 2.3.1: Start deployment strategy with accumulated context") + response1, new_continuation_id = self.call_mcp_tool( + "planner", + { + "step": "Now I need to plan the deployment strategy that supports both the microservices architecture and the database strategy. I'll design the infrastructure and deployment pipeline.", + "step_number": 1, + "total_steps": 2, + "next_step_required": True, + "continuation_id": self.second_continuation_id, # Use second session's continuation_id + }, + ) + + if not response1 or not new_continuation_id: + self.logger.error("Failed to start third planning session") + return False + + # Validate context loading + response1_data = self._parse_planner_response(response1) + if "previous_plan_context" not in response1_data: + self.logger.error("Third session should load context from previous completed sessions") + return False + + # Check context contains content from most recent completed session + context = response1_data["previous_plan_context"].lower() + expected_terms = ["database", "event sourcing", "cqrs"] + found_terms = [term for term in expected_terms if term in context] + + if len(found_terms) == 0: + self.logger.error( + f"Context should contain database strategy content from second session. Context: {context[:200]}..." + ) + return False + + self.logger.info(" ✅ Third session loaded context from most recent completed session") + + # Step 2: Complete deployment plan + self.logger.info(" 2.3.2: Complete deployment strategy") + response2, _ = self.call_mcp_tool( + "planner", + { + "step": "Deployment strategy: Use Kubernetes for container orchestration with Helm charts. Implement CI/CD pipeline with GitOps. Use service mesh (Istio) for traffic management, monitoring, and security. Deploy databases in separate namespaces with backup automation.", + "step_number": 2, + "total_steps": 2, + "next_step_required": False, # Complete the session + "continuation_id": new_continuation_id, + }, + ) + + if not response2: + self.logger.error("Failed to complete third planning session") + return False + + # Validate completion + response2_data = self._parse_planner_response(response2) + if not response2_data.get("planning_complete"): + self.logger.error("Third planning session not marked as complete") + return False + + self.logger.info(" ✅ Third planning session completed successfully") + + # Store for final test + self.third_continuation_id = new_continuation_id + return True + + except Exception as e: + self.logger.error(f"Third planning session test failed: {e}") + return False + + def _test_context_accumulation(self) -> bool: + """Test that context properly accumulates across multiple completed sessions""" + try: + self.logger.info(" 2.4: Testing context accumulation across all sessions") + + # Start a new planning session that should load context from the most recent completed session + self.logger.info(" 2.4.1: Start monitoring planning with full context history") + response1, _ = self.call_mcp_tool( + "planner", + { + "step": "Finally, I need to plan the monitoring and observability strategy that works with the microservices, database, and deployment architecture.", + "step_number": 1, + "total_steps": 1, + "next_step_required": False, + "continuation_id": self.third_continuation_id, # Use third session's continuation_id + }, + ) + + if not response1: + self.logger.error("Failed to start monitoring planning session") + return False + + # Validate context loading + response1_data = self._parse_planner_response(response1) + if "previous_plan_context" not in response1_data: + self.logger.error("Final session should load context from previous completed sessions") + return False + + # Validate context contains most recent completed session content + context = response1_data["previous_plan_context"].lower() + + # Should contain deployment strategy content (most recent) + deployment_terms = ["kubernetes", "deployment", "istio", "gitops"] + found_deployment_terms = [term for term in deployment_terms if term in context] + + if len(found_deployment_terms) == 0: + self.logger.error(f"Context should contain deployment strategy content. Context: {context[:300]}...") + return False + + self.logger.info(" ✅ Context accumulation working correctly") + + # Validate this creates a complete planning session + if not response1_data.get("planning_complete"): + self.logger.error("Final planning session should be marked as complete") + return False + + self.logger.info(" ✅ Context accumulation test completed successfully") + return True + + except Exception as e: + self.logger.error(f"Context accumulation test failed: {e}") + return False + + def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: + """Call an MCP tool via Claude CLI (docker exec) - override for planner-specific response handling""" + # Use parent implementation to get the raw response + response_text, _ = super().call_mcp_tool(tool_name, params) + + if not response_text: + return None, None + + # Extract continuation_id from planner response specifically + continuation_id = self._extract_planner_continuation_id(response_text) + + return response_text, continuation_id + + def _extract_planner_continuation_id(self, response_text: str) -> Optional[str]: + """Extract continuation_id from planner response""" + try: + # Parse the response - it's now direct JSON, not wrapped + response_data = json.loads(response_text) + return response_data.get("continuation_id") + + except json.JSONDecodeError as e: + self.logger.debug(f"Failed to parse response for planner continuation_id: {e}") + return None + + def _parse_planner_response(self, response_text: str) -> dict: + """Parse planner tool JSON response""" + try: + # Parse the response - it's now direct JSON, not wrapped + return json.loads(response_text) + + except json.JSONDecodeError as e: + self.logger.error(f"Failed to parse planner response as JSON: {e}") + self.logger.error(f"Response text: {response_text[:500]}...") + return {} diff --git a/simulator_tests/test_planner_validation.py b/simulator_tests/test_planner_validation.py new file mode 100644 index 0000000..d00b0c5 --- /dev/null +++ b/simulator_tests/test_planner_validation.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 +""" +Planner Tool Validation Test + +Tests the planner tool's sequential planning capabilities including: +- Step-by-step planning with proper JSON responses +- Continuation logic across planning sessions +- Branching and revision capabilities +- Previous plan context loading +- Plan completion and summary storage +""" + +import json +from typing import Optional + +from .base_test import BaseSimulatorTest + + +class PlannerValidationTest(BaseSimulatorTest): + """Test planner tool's sequential planning and continuation features""" + + @property + def test_name(self) -> str: + return "planner_validation" + + @property + def test_description(self) -> str: + return "Planner tool sequential planning and continuation validation" + + def run_test(self) -> bool: + """Test planner tool sequential planning capabilities""" + try: + self.logger.info("Test: Planner tool validation") + + # Test 1: Single planning session with multiple steps + if not self._test_single_planning_session(): + return False + + # Test 2: Plan completion and continuation to new planning session + if not self._test_plan_continuation(): + return False + + # Test 3: Branching and revision capabilities + if not self._test_branching_and_revision(): + return False + + self.logger.info(" ✅ All planner validation tests passed") + return True + + except Exception as e: + self.logger.error(f"Planner validation test failed: {e}") + return False + + def _test_single_planning_session(self) -> bool: + """Test a complete planning session with multiple steps""" + try: + self.logger.info(" 1.1: Testing single planning session") + + # Step 1: Start planning + self.logger.info(" 1.1.1: Step 1 - Initial planning step") + response1, continuation_id = self.call_mcp_tool( + "planner", + { + "step": "I need to plan a microservices migration for our monolithic e-commerce platform. Let me start by understanding the current architecture and identifying the key business domains.", + "step_number": 1, + "total_steps": 5, + "next_step_required": True, + }, + ) + + if not response1 or not continuation_id: + self.logger.error("Failed to get initial planning response") + return False + + # Parse and validate JSON response + response1_data = self._parse_planner_response(response1) + if not response1_data: + return False + + # Validate step 1 response structure + if not self._validate_step_response(response1_data, 1, 5, True, "planning_success"): + return False + + self.logger.info(f" ✅ Step 1 successful, continuation_id: {continuation_id}") + + # Step 2: Continue planning + self.logger.info(" 1.1.2: Step 2 - Domain identification") + response2, _ = self.call_mcp_tool( + "planner", + { + "step": "Based on my analysis, I can identify the main business domains: User Management, Product Catalog, Order Processing, Payment, and Inventory. Let me plan how to extract these into separate services.", + "step_number": 2, + "total_steps": 5, + "next_step_required": True, + "continuation_id": continuation_id, + }, + ) + + if not response2: + self.logger.error("Failed to continue planning to step 2") + return False + + response2_data = self._parse_planner_response(response2) + if not self._validate_step_response(response2_data, 2, 5, True, "planning_success"): + return False + + self.logger.info(" ✅ Step 2 successful") + + # Step 3: Final step + self.logger.info(" 1.1.3: Step 3 - Final planning step") + response3, _ = self.call_mcp_tool( + "planner", + { + "step": "Now I'll create a phased migration strategy: Phase 1 - Extract User Management, Phase 2 - Product Catalog and Inventory, Phase 3 - Order Processing and Payment services. This completes the initial migration plan.", + "step_number": 3, + "total_steps": 3, # Adjusted total + "next_step_required": False, # Final step + "continuation_id": continuation_id, + }, + ) + + if not response3: + self.logger.error("Failed to complete planning session") + return False + + response3_data = self._parse_planner_response(response3) + if not self._validate_final_step_response(response3_data, 3, 3): + return False + + self.logger.info(" ✅ Planning session completed successfully") + + # Store continuation_id for next test + self.migration_continuation_id = continuation_id + return True + + except Exception as e: + self.logger.error(f"Single planning session test failed: {e}") + return False + + def _test_plan_continuation(self) -> bool: + """Test continuing from a previous completed plan""" + try: + self.logger.info(" 1.2: Testing plan continuation with previous context") + + # Start a new planning session using the continuation_id from previous completed plan + self.logger.info(" 1.2.1: New planning session with previous plan context") + response1, new_continuation_id = self.call_mcp_tool( + "planner", + { + "step": "Now that I have the microservices migration plan, let me plan the database strategy. I need to decide how to handle data consistency across the new services.", + "step_number": 1, # New planning session starts at step 1 + "total_steps": 4, + "next_step_required": True, + "continuation_id": self.migration_continuation_id, # Use previous plan's continuation_id + }, + ) + + if not response1 or not new_continuation_id: + self.logger.error("Failed to start new planning session with context") + return False + + response1_data = self._parse_planner_response(response1) + if not response1_data: + return False + + # Should have previous plan context + if "previous_plan_context" not in response1_data: + self.logger.error("Expected previous_plan_context in new planning session") + return False + + # Check for key terms from the previous plan + context = response1_data["previous_plan_context"].lower() + if "migration" not in context and "plan" not in context: + self.logger.error("Previous plan context doesn't contain expected content") + return False + + self.logger.info(" ✅ New planning session loaded previous plan context") + + # Continue the new planning session (step 2+ should NOT load context) + self.logger.info(" 1.2.2: Continue new planning session (no context loading)") + response2, _ = self.call_mcp_tool( + "planner", + { + "step": "I'll implement a database-per-service pattern with eventual consistency using event sourcing for cross-service communication.", + "step_number": 2, + "total_steps": 4, + "next_step_required": True, + "continuation_id": new_continuation_id, # Same continuation, step 2 + }, + ) + + if not response2: + self.logger.error("Failed to continue new planning session") + return False + + response2_data = self._parse_planner_response(response2) + if not response2_data: + return False + + # Step 2+ should NOT have previous_plan_context (only step 1 with continuation_id gets context) + if "previous_plan_context" in response2_data: + self.logger.error("Step 2 should NOT have previous_plan_context") + return False + + self.logger.info(" ✅ Step 2 correctly has no previous context (as expected)") + return True + + except Exception as e: + self.logger.error(f"Plan continuation test failed: {e}") + return False + + def _test_branching_and_revision(self) -> bool: + """Test branching and revision capabilities""" + try: + self.logger.info(" 1.3: Testing branching and revision capabilities") + + # Start a new planning session for testing branching + self.logger.info(" 1.3.1: Start planning session for branching test") + response1, continuation_id = self.call_mcp_tool( + "planner", + { + "step": "Let me plan the deployment strategy for the microservices. I'll consider different deployment options.", + "step_number": 1, + "total_steps": 4, + "next_step_required": True, + }, + ) + + if not response1 or not continuation_id: + self.logger.error("Failed to start branching test planning session") + return False + + # Test branching + self.logger.info(" 1.3.2: Create a branch from step 1") + response2, _ = self.call_mcp_tool( + "planner", + { + "step": "Branch A: I'll explore Kubernetes deployment with service mesh (Istio) for advanced traffic management and observability.", + "step_number": 2, + "total_steps": 4, + "next_step_required": True, + "is_branch_point": True, + "branch_from_step": 1, + "branch_id": "kubernetes-istio", + "continuation_id": continuation_id, + }, + ) + + if not response2: + self.logger.error("Failed to create branch") + return False + + response2_data = self._parse_planner_response(response2) + if not response2_data: + return False + + # Validate branching metadata + metadata = response2_data.get("metadata", {}) + if not metadata.get("is_branch_point"): + self.logger.error("Branch point not properly recorded in metadata") + return False + + if metadata.get("branch_id") != "kubernetes-istio": + self.logger.error("Branch ID not properly recorded") + return False + + if "kubernetes-istio" not in metadata.get("branches", []): + self.logger.error("Branch not recorded in branches list") + return False + + self.logger.info(" ✅ Branching working correctly") + + # Test revision + self.logger.info(" 1.3.3: Revise step 2") + response3, _ = self.call_mcp_tool( + "planner", + { + "step": "Revision: Actually, let me revise the Kubernetes approach. I'll use a simpler Docker Swarm deployment initially, then migrate to Kubernetes later.", + "step_number": 3, + "total_steps": 4, + "next_step_required": True, + "is_step_revision": True, + "revises_step_number": 2, + "continuation_id": continuation_id, + }, + ) + + if not response3: + self.logger.error("Failed to create revision") + return False + + response3_data = self._parse_planner_response(response3) + if not response3_data: + return False + + # Validate revision metadata + metadata = response3_data.get("metadata", {}) + if not metadata.get("is_step_revision"): + self.logger.error("Step revision not properly recorded in metadata") + return False + + if metadata.get("revises_step_number") != 2: + self.logger.error("Revised step number not properly recorded") + return False + + self.logger.info(" ✅ Revision working correctly") + return True + + except Exception as e: + self.logger.error(f"Branching and revision test failed: {e}") + return False + + def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: + """Call an MCP tool via Claude CLI (docker exec) - override for planner-specific response handling""" + # Use parent implementation to get the raw response + response_text, _ = super().call_mcp_tool(tool_name, params) + + if not response_text: + return None, None + + # Extract continuation_id from planner response specifically + continuation_id = self._extract_planner_continuation_id(response_text) + + return response_text, continuation_id + + def _extract_planner_continuation_id(self, response_text: str) -> Optional[str]: + """Extract continuation_id from planner response""" + try: + # Parse the response - it's now direct JSON, not wrapped + response_data = json.loads(response_text) + return response_data.get("continuation_id") + + except json.JSONDecodeError as e: + self.logger.debug(f"Failed to parse response for planner continuation_id: {e}") + return None + + def _parse_planner_response(self, response_text: str) -> dict: + """Parse planner tool JSON response""" + try: + # Parse the response - it's now direct JSON, not wrapped + return json.loads(response_text) + + except json.JSONDecodeError as e: + self.logger.error(f"Failed to parse planner response as JSON: {e}") + self.logger.error(f"Response text: {response_text[:500]}...") + return {} + + def _validate_step_response( + self, + response_data: dict, + expected_step: int, + expected_total: int, + expected_next_required: bool, + expected_status: str, + ) -> bool: + """Validate a planning step response structure""" + try: + # Check status + if response_data.get("status") != expected_status: + self.logger.error(f"Expected status '{expected_status}', got '{response_data.get('status')}'") + return False + + # Check step number + if response_data.get("step_number") != expected_step: + self.logger.error(f"Expected step_number {expected_step}, got {response_data.get('step_number')}") + return False + + # Check total steps + if response_data.get("total_steps") != expected_total: + self.logger.error(f"Expected total_steps {expected_total}, got {response_data.get('total_steps')}") + return False + + # Check next_step_required + if response_data.get("next_step_required") != expected_next_required: + self.logger.error( + f"Expected next_step_required {expected_next_required}, got {response_data.get('next_step_required')}" + ) + return False + + # Check that step_content exists + if not response_data.get("step_content"): + self.logger.error("Missing step_content in response") + return False + + # Check metadata exists + if "metadata" not in response_data: + self.logger.error("Missing metadata in response") + return False + + # Check next_steps guidance + if not response_data.get("next_steps"): + self.logger.error("Missing next_steps guidance in response") + return False + + return True + + except Exception as e: + self.logger.error(f"Error validating step response: {e}") + return False + + def _validate_final_step_response(self, response_data: dict, expected_step: int, expected_total: int) -> bool: + """Validate a final planning step response""" + try: + # Basic step validation + if not self._validate_step_response( + response_data, expected_step, expected_total, False, "planning_success" + ): + return False + + # Check planning_complete flag + if not response_data.get("planning_complete"): + self.logger.error("Expected planning_complete=true for final step") + return False + + # Check plan_summary exists + if not response_data.get("plan_summary"): + self.logger.error("Missing plan_summary in final step") + return False + + # Check plan_summary contains expected content + plan_summary = response_data.get("plan_summary", "") + if "COMPLETE PLAN:" not in plan_summary: + self.logger.error("plan_summary doesn't contain 'COMPLETE PLAN:' marker") + return False + + # Check next_steps mentions completion + next_steps = response_data.get("next_steps", "") + if "complete" not in next_steps.lower(): + self.logger.error("next_steps doesn't indicate planning completion") + return False + + return True + + except Exception as e: + self.logger.error(f"Error validating final step response: {e}") + return False diff --git a/systemprompts/__init__.py b/systemprompts/__init__.py index 1568f7a..1e5047d 100644 --- a/systemprompts/__init__.py +++ b/systemprompts/__init__.py @@ -7,6 +7,7 @@ from .chat_prompt import CHAT_PROMPT from .codereview_prompt import CODEREVIEW_PROMPT from .consensus_prompt import CONSENSUS_PROMPT from .debug_prompt import DEBUG_ISSUE_PROMPT +from .planner_prompt import PLANNER_PROMPT from .precommit_prompt import PRECOMMIT_PROMPT from .refactor_prompt import REFACTOR_PROMPT from .testgen_prompt import TESTGEN_PROMPT @@ -19,6 +20,7 @@ __all__ = [ "ANALYZE_PROMPT", "CHAT_PROMPT", "CONSENSUS_PROMPT", + "PLANNER_PROMPT", "PRECOMMIT_PROMPT", "REFACTOR_PROMPT", "TESTGEN_PROMPT", diff --git a/systemprompts/planner_prompt.py b/systemprompts/planner_prompt.py new file mode 100644 index 0000000..cf3c694 --- /dev/null +++ b/systemprompts/planner_prompt.py @@ -0,0 +1,124 @@ +""" +Planner tool system prompts +""" + +PLANNER_PROMPT = """ +You are an expert, seasoned planning consultant and systems architect with deep expertise in plan structuring, risk assessment, +and software development strategy. You have extensive experience organizing complex projects, guiding technical implementations, +and maintaining a sharp understanding of both your own and competing products across the market. From microservices +to global-scale deployments, your technical insight and architectural knowledge are unmatched. There is nothing related +to software and software development that you're not aware of. All the latest frameworks, languages, trends, techniques +is something you have mastery in. Your role is to critically evaluate and refine plans to make them more robust, +efficient, and implementation-ready. + +CRITICAL LINE NUMBER INSTRUCTIONS +Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be +included in any code you generate. Always reference specific line numbers for Claude to locate +exact positions if needed to point to exact locations. Include a very short code excerpt alongside for clarity. +Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code +snippets. + +IF MORE INFORMATION IS NEEDED +If Claude is discussing specific code, functions, or project components that was not given as part of the context, +and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful +collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been +provided unless for some reason its content is missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} + +PLANNING METHODOLOGY: + +1. DECOMPOSITION: Break down the main objective into logical, sequential steps +2. DEPENDENCIES: Identify which steps depend on others and order them appropriately +3. BRANCHING: When multiple valid approaches exist, create branches to explore alternatives +4. ITERATION: Be willing to step back and refine earlier steps if new insights emerge +5. COMPLETENESS: Ensure all aspects of the task are covered without gaps + +STEP STRUCTURE: +Each step in your plan MUST include: +- Step number and branch identifier (if branching) +- Clear, actionable description +- Prerequisites or dependencies +- Expected outcomes +- Potential challenges or considerations +- Alternative approaches (when applicable) + +BRANCHING GUIDELINES: +- Use branches to explore different implementation strategies +- Label branches clearly (e.g., "Branch A: Microservices approach", "Branch B: Monolithic approach") +- Explain when and why to choose each branch +- Show how branches might reconverge + +PLANNING PRINCIPLES: +- Start with high-level strategy, then add implementation details +- Consider technical, organizational, and resource constraints +- Include validation and testing steps +- Plan for error handling and rollback scenarios +- Think about maintenance and future extensibility + +STRUCTURED JSON OUTPUT FORMAT: +You MUST respond with a properly formatted JSON object following this exact schema. +Do NOT include any text before or after the JSON. The response must be valid JSON only. + +IF MORE INFORMATION IS NEEDED: +If you lack critical information to proceed with planning, you MUST only respond with: +{ + "status": "clarification_required", + "question": "", + "files_needed": ["", ""] +} + +FOR NORMAL PLANNING RESPONSES: + +{ + "status": "planning_success", + "step_number": , + "total_steps": , + "next_step_required": , + "step_content": "", + "metadata": { + "branches": [""], + "step_history_length": , + "is_step_revision": , + "revises_step_number": , + "is_branch_point": , + "branch_from_step": , + "branch_id": "", + "more_steps_needed": + }, + "continuation_id": "", + "planning_complete": , + "plan_summary": "", + "next_steps": "", + "previous_plan_context": "" +} + +PLANNING CONTENT GUIDELINES: +- step_content: Provide detailed planning analysis for the current step +- Include specific actions, prerequisites, outcomes, and considerations +- When branching, clearly explain the alternative approach and when to use it +- When completing planning, provide comprehensive plan_summary +- next_steps: Always guide Claude on what to do next (continue planning, implement, or branch) + +PLAN PRESENTATION GUIDELINES: +When planning is complete (planning_complete: true), Claude should present the final plan with: +- Clear headings and numbered phases/sections +- Visual elements like ASCII charts for workflows, dependencies, or sequences +- Bullet points and sub-steps for detailed breakdowns +- Implementation guidance and next steps +- Visual organization (boxes, arrows, diagrams) for complex relationships +- Tables for comparisons or resource allocation +- Priority indicators and sequence information where relevant + +IMPORTANT: Do NOT use emojis in plan presentations. Use clear text formatting, ASCII characters, and symbols only. +IMPORTANT: Do NOT mention time estimates, costs, or pricing unless explicitly requested by the user. + +Example visual elements to use: +- Phase diagrams: Phase 1 → Phase 2 → Phase 3 +- Dependency charts: A ← B ← C (C depends on B, B depends on A) +- Sequence boxes: [Phase 1: Setup] → [Phase 2: Development] → [Phase 3: Testing] +- Decision trees for branching strategies +- Resource allocation tables + +Be thorough, practical, and consider edge cases. Your planning should be detailed enough that someone could follow it step-by-step to achieve the goal. +""" diff --git a/tests/test_planner.py b/tests/test_planner.py new file mode 100644 index 0000000..5f9561f --- /dev/null +++ b/tests/test_planner.py @@ -0,0 +1,413 @@ +""" +Tests for the planner tool. +""" + +from unittest.mock import patch + +import pytest + +from tools.models import ToolModelCategory +from tools.planner import PlannerRequest, PlannerTool + + +class TestPlannerTool: + """Test suite for PlannerTool.""" + + def test_tool_metadata(self): + """Test basic tool metadata and configuration.""" + tool = PlannerTool() + + assert tool.get_name() == "planner" + assert "SEQUENTIAL PLANNER" in tool.get_description() + assert tool.get_default_temperature() == 0.5 # TEMPERATURE_BALANCED + assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING + assert tool.get_default_thinking_mode() == "high" + + def test_request_validation(self): + """Test Pydantic request model validation.""" + # Valid interactive step request + step_request = PlannerRequest( + step="Create database migration scripts", step_number=3, total_steps=10, next_step_required=True + ) + assert step_request.step == "Create database migration scripts" + assert step_request.step_number == 3 + assert step_request.next_step_required is True + assert step_request.is_step_revision is False # default + + # Missing required fields should fail + with pytest.raises(ValueError): + PlannerRequest() # Missing all required fields + + with pytest.raises(ValueError): + PlannerRequest(step="test") # Missing other required fields + + def test_input_schema_generation(self): + """Test JSON schema generation for MCP client.""" + tool = PlannerTool() + schema = tool.get_input_schema() + + assert schema["type"] == "object" + # Interactive planning fields + assert "step" in schema["properties"] + assert "step_number" in schema["properties"] + assert "total_steps" in schema["properties"] + assert "next_step_required" in schema["properties"] + assert "is_step_revision" in schema["properties"] + assert "is_branch_point" in schema["properties"] + assert "branch_id" in schema["properties"] + assert "continuation_id" in schema["properties"] + + # Check excluded fields are NOT present + assert "model" not in schema["properties"] + assert "images" not in schema["properties"] + assert "files" not in schema["properties"] + assert "temperature" not in schema["properties"] + assert "thinking_mode" not in schema["properties"] + assert "use_websearch" not in schema["properties"] + + # Check required fields + assert "step" in schema["required"] + assert "step_number" in schema["required"] + assert "total_steps" in schema["required"] + assert "next_step_required" in schema["required"] + + def test_model_category_for_planning(self): + """Test that planner uses extended reasoning category.""" + tool = PlannerTool() + category = tool.get_model_category() + + # Planning needs deep thinking + assert category == ToolModelCategory.EXTENDED_REASONING + + @pytest.mark.asyncio + async def test_execute_first_step(self): + """Test execute method for first planning step.""" + tool = PlannerTool() + arguments = { + "step": "Plan a microservices migration for our monolithic e-commerce platform", + "step_number": 1, + "total_steps": 10, + "next_step_required": True, + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.create_thread", return_value="test-uuid-123"): + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + assert result[0].type == "text" + + # Parse the JSON response + import json + + parsed_response = json.loads(result[0].text) + + assert parsed_response["step_number"] == 1 + assert parsed_response["total_steps"] == 10 + assert parsed_response["next_step_required"] is True + assert parsed_response["continuation_id"] == "test-uuid-123" + assert parsed_response["status"] == "planning_success" + + @pytest.mark.asyncio + async def test_execute_subsequent_step(self): + """Test execute method for subsequent planning step.""" + tool = PlannerTool() + arguments = { + "step": "Set up Docker containers for each microservice", + "step_number": 2, + "total_steps": 8, + "next_step_required": True, + "continuation_id": "existing-uuid-456", + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + assert result[0].type == "text" + + # Parse the JSON response + import json + + parsed_response = json.loads(result[0].text) + + assert parsed_response["step_number"] == 2 + assert parsed_response["total_steps"] == 8 + assert parsed_response["next_step_required"] is True + assert parsed_response["continuation_id"] == "existing-uuid-456" + assert parsed_response["status"] == "planning_success" + + @pytest.mark.asyncio + async def test_execute_with_continuation_context(self): + """Test execute method with continuation that loads previous context.""" + tool = PlannerTool() + arguments = { + "step": "Continue planning the deployment phase", + "step_number": 1, # Step 1 with continuation_id loads context + "total_steps": 8, + "next_step_required": True, + "continuation_id": "test-continuation-id", + } + + # Mock thread with completed plan + from utils.conversation_memory import ConversationTurn, ThreadContext + + mock_turn = ConversationTurn( + role="assistant", + content='{"status": "planning_success", "planning_complete": true, "plan_summary": "COMPLETE PLAN: Authentication system with 3 steps completed"}', + tool_name="planner", + model_name="claude-planner", + timestamp="2024-01-01T00:00:00Z", + ) + mock_thread = ThreadContext( + thread_id="test-id", + tool_name="planner", + turns=[mock_turn], + created_at="2024-01-01T00:00:00Z", + last_updated_at="2024-01-01T00:00:00Z", + initial_context={}, + ) + + with patch("utils.conversation_memory.get_thread", return_value=mock_thread): + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + response_text = result[0].text + + # Should include previous plan context in JSON + import json + + parsed_response = json.loads(response_text) + + # Check for previous plan context in the structured response + assert "previous_plan_context" in parsed_response + assert "Authentication system" in parsed_response["previous_plan_context"] + + @pytest.mark.asyncio + async def test_execute_final_step(self): + """Test execute method for final planning step.""" + tool = PlannerTool() + arguments = { + "step": "Deploy and monitor the new system", + "step_number": 10, + "total_steps": 10, + "next_step_required": False, # Final step + "continuation_id": "test-uuid-789", + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + response_text = result[0].text + + # Parse the structured JSON response + import json + + parsed_response = json.loads(response_text) + + # Check final step structure + assert parsed_response["status"] == "planning_success" + assert parsed_response["step_number"] == 10 + assert parsed_response["planning_complete"] is True + assert "plan_summary" in parsed_response + assert "COMPLETE PLAN:" in parsed_response["plan_summary"] + + @pytest.mark.asyncio + async def test_execute_with_branching(self): + """Test execute method with branching.""" + tool = PlannerTool() + arguments = { + "step": "Use Kubernetes for orchestration", + "step_number": 4, + "total_steps": 10, + "next_step_required": True, + "is_branch_point": True, + "branch_from_step": 3, + "branch_id": "cloud-native-path", + "continuation_id": "test-uuid-branch", + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + response_text = result[0].text + + # Parse the JSON response + import json + + parsed_response = json.loads(response_text) + + assert parsed_response["metadata"]["branches"] == ["cloud-native-path"] + assert "cloud-native-path" in str(tool.branches) + + @pytest.mark.asyncio + async def test_execute_with_revision(self): + """Test execute method with step revision.""" + tool = PlannerTool() + arguments = { + "step": "Revise API design to use GraphQL instead of REST", + "step_number": 3, + "total_steps": 8, + "next_step_required": True, + "is_step_revision": True, + "revises_step_number": 2, + "continuation_id": "test-uuid-revision", + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + response_text = result[0].text + + # Parse the JSON response + import json + + parsed_response = json.loads(response_text) + + assert parsed_response["step_number"] == 3 + assert parsed_response["next_step_required"] is True + assert parsed_response["metadata"]["is_step_revision"] is True + assert parsed_response["metadata"]["revises_step_number"] == 2 + + # Check that step data was stored in history + assert len(tool.step_history) > 0 + latest_step = tool.step_history[-1] + assert latest_step["is_step_revision"] is True + assert latest_step["revises_step_number"] == 2 + + @pytest.mark.asyncio + async def test_execute_adjusts_total_steps(self): + """Test execute method adjusts total steps when current step exceeds estimate.""" + tool = PlannerTool() + arguments = { + "step": "Additional step discovered during planning", + "step_number": 8, + "total_steps": 5, # Current step exceeds total + "next_step_required": True, + "continuation_id": "test-uuid-adjust", + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.add_turn"): + result = await tool.execute(arguments) + + # Should return a list with TextContent + assert len(result) == 1 + response_text = result[0].text + + # Parse the JSON response + import json + + parsed_response = json.loads(response_text) + + # Total steps should be adjusted to match current step + assert parsed_response["total_steps"] == 8 + assert parsed_response["step_number"] == 8 + assert parsed_response["status"] == "planning_success" + + @pytest.mark.asyncio + async def test_execute_error_handling(self): + """Test execute method error handling.""" + tool = PlannerTool() + # Invalid arguments - missing required fields + arguments = { + "step": "Invalid request" + # Missing required fields: step_number, total_steps, next_step_required + } + + result = await tool.execute(arguments) + + # Should return error response + assert len(result) == 1 + response_text = result[0].text + + # Parse the JSON response + import json + + parsed_response = json.loads(response_text) + + assert parsed_response["status"] == "planning_failed" + assert "error" in parsed_response + + @pytest.mark.asyncio + async def test_execute_step_history_tracking(self): + """Test that execute method properly tracks step history.""" + tool = PlannerTool() + + # Execute multiple steps + step1_args = {"step": "First step", "step_number": 1, "total_steps": 3, "next_step_required": True} + + step2_args = { + "step": "Second step", + "step_number": 2, + "total_steps": 3, + "next_step_required": True, + "continuation_id": "test-uuid-history", + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.create_thread", return_value="test-uuid-history"): + with patch("utils.conversation_memory.add_turn"): + await tool.execute(step1_args) + await tool.execute(step2_args) + + # Should have tracked both steps + assert len(tool.step_history) == 2 + assert tool.step_history[0]["step"] == "First step" + assert tool.step_history[1]["step"] == "Second step" + + +# Integration test +class TestPlannerToolIntegration: + """Integration tests for planner tool.""" + + def setup_method(self): + """Set up model context for integration tests.""" + from utils.model_context import ModelContext + + self.tool = PlannerTool() + self.tool._model_context = ModelContext("flash") # Test model + + @pytest.mark.asyncio + async def test_interactive_planning_flow(self): + """Test complete interactive planning flow.""" + arguments = { + "step": "Plan a complete system redesign", + "step_number": 1, + "total_steps": 5, + "next_step_required": True, + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.create_thread", return_value="test-flow-uuid"): + with patch("utils.conversation_memory.add_turn"): + result = await self.tool.execute(arguments) + + # Verify response structure + assert len(result) == 1 + response_text = result[0].text + + # Parse the JSON response + import json + + parsed_response = json.loads(response_text) + + assert parsed_response["step_number"] == 1 + assert parsed_response["total_steps"] == 5 + assert parsed_response["continuation_id"] == "test-flow-uuid" + assert parsed_response["status"] == "planning_success" diff --git a/tests/test_server.py b/tests/test_server.py index d7b6b63..0ca352c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -27,10 +27,11 @@ class TestServerTools: assert "testgen" in tool_names assert "refactor" in tool_names assert "tracer" in tool_names + assert "planner" in tool_names assert "version" in tool_names - # Should have exactly 12 tools (including consensus, refactor, tracer, and listmodels) - assert len(tools) == 12 + # Should have exactly 13 tools (including consensus, refactor, tracer, listmodels, and planner) + assert len(tools) == 13 # Check descriptions are verbose for tool in tools: diff --git a/tools/__init__.py b/tools/__init__.py index 5dd9193..8a11b08 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -8,6 +8,7 @@ from .codereview import CodeReviewTool from .consensus import ConsensusTool from .debug import DebugIssueTool from .listmodels import ListModelsTool +from .planner import PlannerTool from .precommit import Precommit from .refactor import RefactorTool from .testgen import TestGenerationTool @@ -22,6 +23,7 @@ __all__ = [ "ChatTool", "ConsensusTool", "ListModelsTool", + "PlannerTool", "Precommit", "RefactorTool", "TestGenerationTool", diff --git a/tools/planner.py b/tools/planner.py new file mode 100644 index 0000000..4777d4a --- /dev/null +++ b/tools/planner.py @@ -0,0 +1,440 @@ +""" +Planner tool + +This tool helps you break down complex ideas, problems, or projects into multiple +manageable steps. It enables Claude to think through larger problems sequentially, creating +detailed action plans with clear dependencies and alternatives where applicable. + +=== CONTINUATION FLOW LOGIC === + +The tool implements sophisticated continuation logic that enables multi-session planning: + +RULE 1: No continuation_id + step_number=1 +→ Creates NEW planning thread +→ NO previous context loaded +→ Returns continuation_id for future steps + +RULE 2: continuation_id provided + step_number=1 +→ Loads PREVIOUS COMPLETE PLAN as context +→ Starts NEW planning session with historical context +→ Claude sees summary of previous completed plan + +RULE 3: continuation_id provided + step_number>1 +→ NO previous context loaded (middle of current planning session) +→ Continues current planning without historical interference + +RULE 4: next_step_required=false (final step) +→ Stores COMPLETE PLAN summary in conversation memory +→ Returns continuation_id for future planning sessions + +=== CONCRETE EXAMPLE === + +FIRST PLANNING SESSION (Feature A): +Call 1: planner(step="Plan user authentication", step_number=1, total_steps=3, next_step_required=true) + → NEW thread created: "uuid-abc123" + → Response: {"step_number": 1, "continuation_id": "uuid-abc123"} + +Call 2: planner(step="Design login flow", step_number=2, total_steps=3, next_step_required=true, continuation_id="uuid-abc123") + → Middle of current plan - NO context loading + → Response: {"step_number": 2, "continuation_id": "uuid-abc123"} + +Call 3: planner(step="Security implementation", step_number=3, total_steps=3, next_step_required=FALSE, continuation_id="uuid-abc123") + → FINAL STEP: Stores "COMPLETE PLAN: Security implementation (3 steps completed)" + → Response: {"step_number": 3, "planning_complete": true, "continuation_id": "uuid-abc123"} + +LATER PLANNING SESSION (Feature B): +Call 1: planner(step="Plan dashboard system", step_number=1, total_steps=2, next_step_required=true, continuation_id="uuid-abc123") + → Loads previous complete plan as context + → Response includes: "=== PREVIOUS COMPLETE PLAN CONTEXT === Security implementation..." + → Claude sees previous work and can build upon it + +Call 2: planner(step="Dashboard widgets", step_number=2, total_steps=2, next_step_required=FALSE, continuation_id="uuid-abc123") + → FINAL STEP: Stores new complete plan summary + → Both planning sessions now available for future continuations + +This enables Claude to say: "Continue planning feature C using the authentication and dashboard work" +and the tool will provide context from both previous completed planning sessions. +""" + +import json +import logging +from typing import TYPE_CHECKING, Any, Optional + +from pydantic import Field + +if TYPE_CHECKING: + from tools.models import ToolModelCategory + +from config import TEMPERATURE_BALANCED +from systemprompts import PLANNER_PROMPT + +from .base import BaseTool, ToolRequest + +logger = logging.getLogger(__name__) + +# Field descriptions to avoid duplication between Pydantic and JSON schema +PLANNER_FIELD_DESCRIPTIONS = { + # Interactive planning fields for step-by-step planning + "step": ( + "Your current planning step. For the first step, describe the task/problem to plan. " + "For subsequent steps, provide the actual planning step content. Can include: regular planning steps, " + "revisions of previous steps, questions about previous decisions, realizations about needing more analysis, " + "changes in approach, etc." + ), + "step_number": "Current step number in the planning sequence (starts at 1)", + "total_steps": "Current estimate of total steps needed (can be adjusted up/down as planning progresses)", + "next_step_required": "Whether another planning step is required after this one", + "is_step_revision": "True if this step revises/replaces a previous step", + "revises_step_number": "If is_step_revision is true, which step number is being revised", + "is_branch_point": "True if this step branches from a previous step to explore alternatives", + "branch_from_step": "If is_branch_point is true, which step number is the branching point", + "branch_id": "Identifier for the current branch (e.g., 'approach-A', 'microservices-path')", + "more_steps_needed": "True if more steps are needed beyond the initial estimate", + "continuation_id": "Thread continuation ID for multi-turn planning sessions (useful for seeding new plans with prior context)", +} + + +class PlanStep: + """Represents a single step in the planning process.""" + + def __init__( + self, step_number: int, content: str, branch_id: Optional[str] = None, parent_step: Optional[int] = None + ): + self.step_number = step_number + self.content = content + self.branch_id = branch_id or "main" + self.parent_step = parent_step + self.children = [] + + +class PlannerRequest(ToolRequest): + """Request model for the planner tool - interactive step-by-step planning.""" + + # Required fields for each planning step + step: str = Field(..., description=PLANNER_FIELD_DESCRIPTIONS["step"]) + step_number: int = Field(..., description=PLANNER_FIELD_DESCRIPTIONS["step_number"]) + total_steps: int = Field(..., description=PLANNER_FIELD_DESCRIPTIONS["total_steps"]) + next_step_required: bool = Field(..., description=PLANNER_FIELD_DESCRIPTIONS["next_step_required"]) + + # Optional revision/branching fields + is_step_revision: Optional[bool] = Field(False, description=PLANNER_FIELD_DESCRIPTIONS["is_step_revision"]) + revises_step_number: Optional[int] = Field(None, description=PLANNER_FIELD_DESCRIPTIONS["revises_step_number"]) + is_branch_point: Optional[bool] = Field(False, description=PLANNER_FIELD_DESCRIPTIONS["is_branch_point"]) + branch_from_step: Optional[int] = Field(None, description=PLANNER_FIELD_DESCRIPTIONS["branch_from_step"]) + branch_id: Optional[str] = Field(None, description=PLANNER_FIELD_DESCRIPTIONS["branch_id"]) + more_steps_needed: Optional[bool] = Field(False, description=PLANNER_FIELD_DESCRIPTIONS["more_steps_needed"]) + + # Optional continuation field + continuation_id: Optional[str] = Field(None, description=PLANNER_FIELD_DESCRIPTIONS["continuation_id"]) + + # Override inherited fields to exclude them from schema + model: Optional[str] = Field(default=None, exclude=True) + temperature: Optional[float] = Field(default=None, exclude=True) + thinking_mode: Optional[str] = Field(default=None, exclude=True) + use_websearch: Optional[bool] = Field(default=None, exclude=True) + images: Optional[list] = Field(default=None, exclude=True) + + +class PlannerTool(BaseTool): + """Sequential planning tool with step-by-step breakdown and refinement.""" + + def __init__(self): + super().__init__() + self.step_history = [] + self.branches = {} + + def get_name(self) -> str: + return "planner" + + def get_description(self) -> str: + return ( + "INTERACTIVE SEQUENTIAL PLANNER - Break down complex tasks through step-by-step planning. " + "This tool enables you to think sequentially, building plans incrementally with the ability " + "to revise, branch, and adapt as understanding deepens.\n\n" + "How it works:\n" + "- Start with step 1: describe the task/problem to plan\n" + "- Continue with subsequent steps, building the plan piece by piece\n" + "- Adjust total_steps estimate as you progress\n" + "- Revise previous steps when new insights emerge\n" + "- Branch into alternative approaches when needed\n" + "- Add more steps even after reaching the initial estimate\n\n" + "Key features:\n" + "- Sequential thinking with full context awareness\n" + "- Branching for exploring alternative strategies\n" + "- Revision capabilities to update earlier decisions\n" + "- Dynamic step count adjustment\n\n" + "Perfect for: complex project planning, system design with unknowns, " + "migration strategies, architectural decisions, problem decomposition." + ) + + def get_input_schema(self) -> dict[str, Any]: + schema = { + "type": "object", + "properties": { + # Interactive planning fields + "step": { + "type": "string", + "description": PLANNER_FIELD_DESCRIPTIONS["step"], + }, + "step_number": { + "type": "integer", + "description": PLANNER_FIELD_DESCRIPTIONS["step_number"], + "minimum": 1, + }, + "total_steps": { + "type": "integer", + "description": PLANNER_FIELD_DESCRIPTIONS["total_steps"], + "minimum": 1, + }, + "next_step_required": { + "type": "boolean", + "description": PLANNER_FIELD_DESCRIPTIONS["next_step_required"], + }, + "is_step_revision": { + "type": "boolean", + "description": PLANNER_FIELD_DESCRIPTIONS["is_step_revision"], + }, + "revises_step_number": { + "type": "integer", + "description": PLANNER_FIELD_DESCRIPTIONS["revises_step_number"], + "minimum": 1, + }, + "is_branch_point": { + "type": "boolean", + "description": PLANNER_FIELD_DESCRIPTIONS["is_branch_point"], + }, + "branch_from_step": { + "type": "integer", + "description": PLANNER_FIELD_DESCRIPTIONS["branch_from_step"], + "minimum": 1, + }, + "branch_id": { + "type": "string", + "description": PLANNER_FIELD_DESCRIPTIONS["branch_id"], + }, + "more_steps_needed": { + "type": "boolean", + "description": PLANNER_FIELD_DESCRIPTIONS["more_steps_needed"], + }, + "continuation_id": { + "type": "string", + "description": PLANNER_FIELD_DESCRIPTIONS["continuation_id"], + }, + }, + # Required fields for interactive planning + "required": ["step", "step_number", "total_steps", "next_step_required"], + } + return schema + + def get_system_prompt(self) -> str: + return PLANNER_PROMPT + + def get_request_model(self): + return PlannerRequest + + def get_default_temperature(self) -> float: + return TEMPERATURE_BALANCED + + def get_model_category(self) -> "ToolModelCategory": + from tools.models import ToolModelCategory + + return ToolModelCategory.EXTENDED_REASONING # Planning benefits from deep thinking + + def get_default_thinking_mode(self) -> str: + return "high" # Default to high thinking for comprehensive planning + + async def execute(self, arguments: dict[str, Any]) -> list: + """ + Override execute to work like original TypeScript tool - no AI calls, just data processing. + + This method implements the core continuation logic that enables multi-session planning: + + CONTINUATION LOGIC: + 1. If no continuation_id + step_number=1: Create new planning thread + 2. If continuation_id + step_number=1: Load previous complete plan as context for NEW planning + 3. If continuation_id + step_number>1: Continue current plan (no context loading) + 4. If next_step_required=false: Mark complete and store plan summary for future use + + CONVERSATION MEMORY INTEGRATION: + - Each step is stored in conversation memory for cross-tool continuation + - Final steps store COMPLETE PLAN summaries that can be loaded as context + - Only step 1 with continuation_id loads previous context (new planning session) + - Steps 2+ with continuation_id continue current session without context interference + """ + from mcp.types import TextContent + + from utils.conversation_memory import add_turn, create_thread, get_thread + + try: + # Validate request like the original + request_model = self.get_request_model() + request = request_model(**arguments) + + # Process step like original TypeScript tool + if request.step_number > request.total_steps: + request.total_steps = request.step_number + + # === CONTINUATION LOGIC IMPLEMENTATION === + # This implements the 4 rules documented in the module docstring + + continuation_id = request.continuation_id + previous_plan_context = "" + + # RULE 1: No continuation_id + step_number=1 → Create NEW planning thread + if not continuation_id and request.step_number == 1: + # Filter arguments to only include serializable data for conversation memory + serializable_args = { + k: v + for k, v in arguments.items() + if not hasattr(v, "__class__") or v.__class__.__module__ != "utils.model_context" + } + continuation_id = create_thread("planner", serializable_args) + # Result: New thread created, no previous context, returns continuation_id + + # RULE 2: continuation_id + step_number=1 → Load PREVIOUS COMPLETE PLAN as context + elif continuation_id and request.step_number == 1: + thread = get_thread(continuation_id) + if thread: + # Search for most recent COMPLETE PLAN from previous planning sessions + for turn in reversed(thread.turns): # Newest first + if turn.tool_name == "planner" and turn.role == "assistant": + # Try to parse as JSON first (new format) + try: + turn_data = json.loads(turn.content) + if isinstance(turn_data, dict) and turn_data.get("planning_complete"): + # New JSON format + plan_summary = turn_data.get("plan_summary", "") + if plan_summary: + previous_plan_context = plan_summary[:500] + break + except (json.JSONDecodeError, ValueError): + # Fallback to old text format + if "planning_complete" in turn.content: + try: + if "COMPLETE PLAN:" in turn.content: + plan_start = turn.content.find("COMPLETE PLAN:") + previous_plan_context = turn.content[plan_start : plan_start + 500] + "..." + else: + previous_plan_context = turn.content[:300] + "..." + break + except Exception: + pass + + if previous_plan_context: + previous_plan_context = f"\\n\\n=== PREVIOUS COMPLETE PLAN CONTEXT ===\\n{previous_plan_context}\\n=== END CONTEXT ===\\n" + # Result: NEW planning session with previous complete plan as context + + # RULE 3: continuation_id + step_number>1 → Continue current plan (no context loading) + # This case is handled by doing nothing - we're in the middle of current planning + # Result: Current planning continues without historical interference + + step_data = { + "step": request.step, + "step_number": request.step_number, + "total_steps": request.total_steps, + "next_step_required": request.next_step_required, + "is_step_revision": request.is_step_revision, + "revises_step_number": request.revises_step_number, + "is_branch_point": request.is_branch_point, + "branch_from_step": request.branch_from_step, + "branch_id": request.branch_id, + "more_steps_needed": request.more_steps_needed, + "continuation_id": request.continuation_id, + } + + # Store in local history like original + self.step_history.append(step_data) + + # Handle branching like original + if request.is_branch_point and request.branch_from_step and request.branch_id: + if request.branch_id not in self.branches: + self.branches[request.branch_id] = [] + self.branches[request.branch_id].append(step_data) + + # Build structured JSON response like other tools (consensus, refactor) + response_data = { + "status": "planning_success", + "step_number": request.step_number, + "total_steps": request.total_steps, + "next_step_required": request.next_step_required, + "step_content": request.step, + "metadata": { + "branches": list(self.branches.keys()), + "step_history_length": len(self.step_history), + "is_step_revision": request.is_step_revision or False, + "revises_step_number": request.revises_step_number, + "is_branch_point": request.is_branch_point or False, + "branch_from_step": request.branch_from_step, + "branch_id": request.branch_id, + "more_steps_needed": request.more_steps_needed or False, + }, + "output": { + "instructions": "This is a structured planning response. Present the step_content as the main planning analysis. If next_step_required is true, continue with the next step. If planning_complete is true, present the complete plan in a well-structured format with clear sections, headings, numbered steps, and visual elements like ASCII charts for phases/dependencies. Use bullet points, sub-steps, sequences, and visual organization to make complex plans easy to understand and follow. IMPORTANT: Do NOT use emojis - use clear text formatting and ASCII characters only. Do NOT mention time estimates or costs unless explicitly requested.", + "format": "step_by_step_planning", + "presentation_guidelines": { + "completed_plans": "Use clear headings, numbered phases, ASCII diagrams for workflows/dependencies, bullet points for sub-tasks, and visual sequences where helpful. No emojis. No time/cost estimates unless requested.", + "step_content": "Present as main analysis with clear structure and actionable insights. No emojis. No time/cost estimates unless requested.", + "continuation": "Use continuation_id for related planning sessions or implementation planning", + }, + }, + } + + # Always include continuation_id if we have one (enables step chaining within session) + if continuation_id: + response_data["continuation_id"] = continuation_id + + # Add previous plan context if available + if previous_plan_context: + response_data["previous_plan_context"] = previous_plan_context.strip() + + # RULE 4: next_step_required=false → Mark complete and store plan summary + if not request.next_step_required: + response_data["planning_complete"] = True + response_data["plan_summary"] = ( + f"COMPLETE PLAN: {request.step} (Total {request.total_steps} steps completed)" + ) + response_data["next_steps"] = ( + "Planning complete. Present the complete plan to the user in a well-structured format with clear sections, " + "numbered steps, visual elements (ASCII charts/diagrams where helpful), sub-step breakdowns, and implementation guidance. " + "Use headings, bullet points, and visual organization to make the plan easy to follow. " + "If there are phases, dependencies, or parallel tracks, show these relationships visually. " + "IMPORTANT: Do NOT use emojis - use clear text formatting and ASCII characters only. " + "Do NOT mention time estimates or costs unless explicitly requested. " + "After presenting the plan, offer to either help implement specific parts or use the continuation_id to start related planning sessions." + ) + # Result: Planning marked complete, summary stored for future context loading + else: + response_data["planning_complete"] = False + remaining_steps = request.total_steps - request.step_number + response_data["next_steps"] = ( + f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining." + ) + # Result: Intermediate step, planning continues + + # Convert to clean JSON response + response_content = json.dumps(response_data, indent=2) + + # Store this step in conversation memory + if continuation_id: + add_turn( + thread_id=continuation_id, + role="assistant", + content=response_content, + tool_name="planner", + model_name="claude-planner", + ) + + # Return the JSON response directly as text content, like consensus tool + return [TextContent(type="text", text=response_content)] + + except Exception as e: + # Error handling - return JSON directly like consensus tool + error_data = {"error": str(e), "status": "planning_failed"} + return [TextContent(type="text", text=json.dumps(error_data, indent=2))] + + # Stub implementations for abstract methods (not used since we override execute) + async def prepare_prompt(self, request: PlannerRequest) -> str: + return "" # Not used - execute() is overridden + + def format_response(self, response: str, request: PlannerRequest, model_info: dict = None) -> str: + return response # Not used - execute() is overridden