From 4dae6e457ee05fb1042cd853d450c203e7985672 Mon Sep 17 00:00:00 2001 From: Fahad Date: Thu, 19 Jun 2025 18:21:43 +0400 Subject: [PATCH] Improved planner: thinks with depth and breadth --- config.py | 2 +- tests/test_planner.py | 46 +++++++++++++++++++++++-- tools/planner.py | 79 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 120 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index 216fdcb..385624b 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.2.3" +__version__ = "5.2.4" # Last update date in ISO format __updated__ = "2025-06-19" # Primary maintainer diff --git a/tests/test_planner.py b/tests/test_planner.py index 182bba6..1d11625 100644 --- a/tests/test_planner.py +++ b/tests/test_planner.py @@ -108,7 +108,11 @@ class TestPlannerTool: assert parsed_response["total_steps"] == 10 assert parsed_response["next_step_required"] is True assert parsed_response["continuation_id"] == "test-uuid-123" - assert parsed_response["status"] == "planning_success" + # For complex plans (>=5 steps) on first step, expect deep thinking pause + assert parsed_response["status"] == "pause_for_deep_thinking" + assert parsed_response["thinking_required"] is True + assert "required_thinking" in parsed_response + assert "MANDATORY: DO NOT call the planner tool again immediately" in parsed_response["next_steps"] @pytest.mark.asyncio async def test_execute_subsequent_step(self): @@ -139,7 +143,11 @@ class TestPlannerTool: assert parsed_response["total_steps"] == 8 assert parsed_response["next_step_required"] is True assert parsed_response["continuation_id"] == "existing-uuid-456" - assert parsed_response["status"] == "planning_success" + # For complex plans (>=5 steps) on step 2, expect deep thinking pause + assert parsed_response["status"] == "pause_for_deep_thinking" + assert parsed_response["thinking_required"] is True + assert "required_thinking" in parsed_response + assert "STOP! Complex planning requires reflection between steps" in parsed_response["next_steps"] @pytest.mark.asyncio async def test_execute_with_continuation_context(self): @@ -410,4 +418,38 @@ class TestPlannerToolIntegration: assert parsed_response["step_number"] == 1 assert parsed_response["total_steps"] == 5 assert parsed_response["continuation_id"] == "test-flow-uuid" + # For complex plans (>=5 steps) on first step, expect deep thinking pause + assert parsed_response["status"] == "pause_for_deep_thinking" + assert parsed_response["thinking_required"] is True + + @pytest.mark.asyncio + async def test_simple_planning_flow(self): + """Test simple planning flow without deep thinking pauses.""" + arguments = { + "step": "Plan a simple feature update", + "step_number": 1, + "total_steps": 3, # Simple plan < 5 steps + "next_step_required": True, + } + + # Mock conversation memory functions + with patch("utils.conversation_memory.create_thread", return_value="test-simple-uuid"): + with patch("utils.conversation_memory.add_turn"): + result = await self.tool.execute(arguments) + + # Verify response structure + assert len(result) == 1 + response_text = result[0].text + + # Parse the JSON response + import json + + parsed_response = json.loads(response_text) + + assert parsed_response["step_number"] == 1 + assert parsed_response["total_steps"] == 3 + assert parsed_response["continuation_id"] == "test-simple-uuid" + # For simple plans (< 5 steps), expect normal flow without deep thinking pause assert parsed_response["status"] == "planning_success" + assert "thinking_required" not in parsed_response + assert "Continue with step 2" in parsed_response["next_steps"] diff --git a/tools/planner.py b/tools/planner.py index 8e74e2d..0638d96 100644 --- a/tools/planner.py +++ b/tools/planner.py @@ -161,9 +161,13 @@ class PlannerTool(BaseTool): "- Add more steps even after reaching the initial estimate\n\n" "Key features:\n" "- Sequential thinking with full context awareness\n" + "- Forced deep reflection for complex plans (≥5 steps) in early stages\n" "- Branching for exploring alternative strategies\n" "- Revision capabilities to update earlier decisions\n" "- Dynamic step count adjustment\n\n" + "ENHANCED: For complex plans (≥5 steps), the first 3 steps enforce deep thinking pauses\n" + "to prevent surface-level planning and ensure thorough consideration of alternatives,\n" + "dependencies, and strategic decisions before moving to tactical details.\n\n" "Perfect for: complex project planning, system design with unknowns, " "migration strategies, architectural decisions, problem decomposition." ) @@ -417,10 +421,77 @@ class PlannerTool(BaseTool): else: response_data["planning_complete"] = False remaining_steps = request.total_steps - request.step_number - response_data["next_steps"] = ( - f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining." - ) - # Result: Intermediate step, planning continues + + # ENHANCED: Add deep thinking pauses for complex plans in early stages + # Only for complex plans (>=5 steps) and first 3 steps - force deep reflection + if request.total_steps >= 5 and request.step_number <= 3: + response_data["status"] = "pause_for_deep_thinking" + response_data["thinking_required"] = True + + if request.step_number == 1: + # Initial deep thinking - understand the full scope + response_data["required_thinking"] = [ + "Analyze the complete scope and complexity of what needs to be planned", + "Consider multiple approaches and their trade-offs", + "Identify key constraints, dependencies, and potential challenges", + "Think about stakeholders, success criteria, and critical requirements", + "Consider what could go wrong and how to mitigate risks early", + ] + response_data["next_steps"] = ( + f"MANDATORY: DO NOT call the planner tool again immediately. This is a complex plan ({request.total_steps} steps) " + f"that requires deep thinking. You MUST first spend time reflecting on the planning challenge:\n\n" + f"REQUIRED DEEP THINKING before calling planner step {request.step_number + 1}:\n" + f"1. Analyze the FULL SCOPE: What exactly needs to be accomplished?\n" + f"2. Consider MULTIPLE APPROACHES: What are 2-3 different ways to tackle this?\n" + f"3. Identify CONSTRAINTS & DEPENDENCIES: What limits our options?\n" + f"4. Think about SUCCESS CRITERIA: How will we know we've succeeded?\n" + f"5. Consider RISKS & MITIGATION: What could go wrong early vs late?\n\n" + f"Only call planner again with step_number: {request.step_number + 1} AFTER this deep analysis." + ) + elif request.step_number == 2: + # Refine approach - dig deeper into the chosen direction + response_data["required_thinking"] = [ + "Evaluate the approach from step 1 - are there better alternatives?", + "Break down the major phases and identify critical decision points", + "Consider resource requirements and potential bottlenecks", + "Think about how different parts interconnect and affect each other", + "Identify areas that need the most careful planning vs quick wins", + ] + response_data["next_steps"] = ( + f"STOP! Complex planning requires reflection between steps. DO NOT call planner immediately.\n\n" + f"MANDATORY REFLECTION before planner step {request.step_number + 1}:\n" + f"1. EVALUATE YOUR APPROACH: Is the direction from step 1 still the best?\n" + f"2. IDENTIFY MAJOR PHASES: What are the 3-5 main chunks of work?\n" + f"3. SPOT DEPENDENCIES: What must happen before what?\n" + f"4. CONSIDER RESOURCES: What skills, tools, or access do we need?\n" + f"5. FIND CRITICAL PATHS: Where could delays hurt the most?\n\n" + f"Think deeply about these aspects, then call planner with step_number: {request.step_number + 1}." + ) + elif request.step_number == 3: + # Final deep thinking - validate and prepare for execution planning + response_data["required_thinking"] = [ + "Validate that the emerging plan addresses the original requirements", + "Identify any gaps or assumptions that need clarification", + "Consider how to validate progress and adjust course if needed", + "Think about what the first concrete steps should be", + "Prepare for transition from strategic to tactical planning", + ] + response_data["next_steps"] = ( + f"PAUSE for final strategic reflection. DO NOT call planner yet.\n\n" + f"FINAL DEEP THINKING before planner step {request.step_number + 1}:\n" + f"1. VALIDATE COMPLETENESS: Does this plan address all original requirements?\n" + f"2. CHECK FOR GAPS: What assumptions need validation? What's unclear?\n" + f"3. PLAN FOR ADAPTATION: How will we know if we need to change course?\n" + f"4. DEFINE FIRST STEPS: What are the first 2-3 concrete actions?\n" + f"5. TRANSITION MINDSET: Ready to shift from strategic to tactical planning?\n\n" + f"After this reflection, call planner with step_number: {request.step_number + 1} to continue with tactical details." + ) + else: + # Normal flow for simple plans or later steps of complex plans + response_data["next_steps"] = ( + f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining." + ) + # Result: Intermediate step, planning continues (with optional deep thinking pause) # Convert to clean JSON response response_content = json.dumps(response_data, indent=2)