Improved planner: thinks with depth and breadth

2025-06-19 18:21:43 +04:00
parent 81270e3712
commit 4dae6e457e
3 changed files with 120 additions and 7 deletions
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.2.3"
+__version__ = "5.2.4"
 # Last update date in ISO format
 __updated__ = "2025-06-19"
 # Primary maintainer
--- a/tests/test_planner.py
+++ b/tests/test_planner.py
@@ -108,7 +108,11 @@ class TestPlannerTool:
        assert parsed_response["total_steps"] == 10
        assert parsed_response["next_step_required"] is True
        assert parsed_response["continuation_id"] == "test-uuid-123"
-        assert parsed_response["status"] == "planning_success"
+        # For complex plans (>=5 steps) on first step, expect deep thinking pause
+        assert parsed_response["status"] == "pause_for_deep_thinking"
+        assert parsed_response["thinking_required"] is True
+        assert "required_thinking" in parsed_response
+        assert "MANDATORY: DO NOT call the planner tool again immediately" in parsed_response["next_steps"]

    @pytest.mark.asyncio
    async def test_execute_subsequent_step(self):
@@ -139,7 +143,11 @@ class TestPlannerTool:
        assert parsed_response["total_steps"] == 8
        assert parsed_response["next_step_required"] is True
        assert parsed_response["continuation_id"] == "existing-uuid-456"
-        assert parsed_response["status"] == "planning_success"
+        # For complex plans (>=5 steps) on step 2, expect deep thinking pause
+        assert parsed_response["status"] == "pause_for_deep_thinking"
+        assert parsed_response["thinking_required"] is True
+        assert "required_thinking" in parsed_response
+        assert "STOP! Complex planning requires reflection between steps" in parsed_response["next_steps"]

    @pytest.mark.asyncio
    async def test_execute_with_continuation_context(self):
@@ -410,4 +418,38 @@ class TestPlannerToolIntegration:
        assert parsed_response["step_number"] == 1
        assert parsed_response["total_steps"] == 5
        assert parsed_response["continuation_id"] == "test-flow-uuid"
+        # For complex plans (>=5 steps) on first step, expect deep thinking pause
+        assert parsed_response["status"] == "pause_for_deep_thinking"
+        assert parsed_response["thinking_required"] is True
+
+    @pytest.mark.asyncio
+    async def test_simple_planning_flow(self):
+        """Test simple planning flow without deep thinking pauses."""
+        arguments = {
+            "step": "Plan a simple feature update",
+            "step_number": 1,
+            "total_steps": 3,  # Simple plan < 5 steps
+            "next_step_required": True,
+        }
+
+        # Mock conversation memory functions
+        with patch("utils.conversation_memory.create_thread", return_value="test-simple-uuid"):
+            with patch("utils.conversation_memory.add_turn"):
+                result = await self.tool.execute(arguments)
+
+        # Verify response structure
+        assert len(result) == 1
+        response_text = result[0].text
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(response_text)
+
+        assert parsed_response["step_number"] == 1
+        assert parsed_response["total_steps"] == 3
+        assert parsed_response["continuation_id"] == "test-simple-uuid"
+        # For simple plans (< 5 steps), expect normal flow without deep thinking pause
        assert parsed_response["status"] == "planning_success"
+        assert "thinking_required" not in parsed_response
+        assert "Continue with step 2" in parsed_response["next_steps"]
--- a/tools/planner.py
+++ b/tools/planner.py
@@ -161,9 +161,13 @@ class PlannerTool(BaseTool):
            "- Add more steps even after reaching the initial estimate\n\n"
            "Key features:\n"
            "- Sequential thinking with full context awareness\n"
+            "- Forced deep reflection for complex plans (≥5 steps) in early stages\n"
            "- Branching for exploring alternative strategies\n"
            "- Revision capabilities to update earlier decisions\n"
            "- Dynamic step count adjustment\n\n"
+            "ENHANCED: For complex plans (≥5 steps), the first 3 steps enforce deep thinking pauses\n"
+            "to prevent surface-level planning and ensure thorough consideration of alternatives,\n"
+            "dependencies, and strategic decisions before moving to tactical details.\n\n"
            "Perfect for: complex project planning, system design with unknowns, "
            "migration strategies, architectural decisions, problem decomposition."
        )
@@ -417,10 +421,77 @@ class PlannerTool(BaseTool):
            else:
                response_data["planning_complete"] = False
                remaining_steps = request.total_steps - request.step_number
-                response_data["next_steps"] = (
-                    f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining."
-                )
-                # Result: Intermediate step, planning continues
+
+                # ENHANCED: Add deep thinking pauses for complex plans in early stages
+                # Only for complex plans (>=5 steps) and first 3 steps - force deep reflection
+                if request.total_steps >= 5 and request.step_number <= 3:
+                    response_data["status"] = "pause_for_deep_thinking"
+                    response_data["thinking_required"] = True
+
+                    if request.step_number == 1:
+                        # Initial deep thinking - understand the full scope
+                        response_data["required_thinking"] = [
+                            "Analyze the complete scope and complexity of what needs to be planned",
+                            "Consider multiple approaches and their trade-offs",
+                            "Identify key constraints, dependencies, and potential challenges",
+                            "Think about stakeholders, success criteria, and critical requirements",
+                            "Consider what could go wrong and how to mitigate risks early",
+                        ]
+                        response_data["next_steps"] = (
+                            f"MANDATORY: DO NOT call the planner tool again immediately. This is a complex plan ({request.total_steps} steps) "
+                            f"that requires deep thinking. You MUST first spend time reflecting on the planning challenge:\n\n"
+                            f"REQUIRED DEEP THINKING before calling planner step {request.step_number + 1}:\n"
+                            f"1. Analyze the FULL SCOPE: What exactly needs to be accomplished?\n"
+                            f"2. Consider MULTIPLE APPROACHES: What are 2-3 different ways to tackle this?\n"
+                            f"3. Identify CONSTRAINTS & DEPENDENCIES: What limits our options?\n"
+                            f"4. Think about SUCCESS CRITERIA: How will we know we've succeeded?\n"
+                            f"5. Consider RISKS & MITIGATION: What could go wrong early vs late?\n\n"
+                            f"Only call planner again with step_number: {request.step_number + 1} AFTER this deep analysis."
+                        )
+                    elif request.step_number == 2:
+                        # Refine approach - dig deeper into the chosen direction
+                        response_data["required_thinking"] = [
+                            "Evaluate the approach from step 1 - are there better alternatives?",
+                            "Break down the major phases and identify critical decision points",
+                            "Consider resource requirements and potential bottlenecks",
+                            "Think about how different parts interconnect and affect each other",
+                            "Identify areas that need the most careful planning vs quick wins",
+                        ]
+                        response_data["next_steps"] = (
+                            f"STOP! Complex planning requires reflection between steps. DO NOT call planner immediately.\n\n"
+                            f"MANDATORY REFLECTION before planner step {request.step_number + 1}:\n"
+                            f"1. EVALUATE YOUR APPROACH: Is the direction from step 1 still the best?\n"
+                            f"2. IDENTIFY MAJOR PHASES: What are the 3-5 main chunks of work?\n"
+                            f"3. SPOT DEPENDENCIES: What must happen before what?\n"
+                            f"4. CONSIDER RESOURCES: What skills, tools, or access do we need?\n"
+                            f"5. FIND CRITICAL PATHS: Where could delays hurt the most?\n\n"
+                            f"Think deeply about these aspects, then call planner with step_number: {request.step_number + 1}."
+                        )
+                    elif request.step_number == 3:
+                        # Final deep thinking - validate and prepare for execution planning
+                        response_data["required_thinking"] = [
+                            "Validate that the emerging plan addresses the original requirements",
+                            "Identify any gaps or assumptions that need clarification",
+                            "Consider how to validate progress and adjust course if needed",
+                            "Think about what the first concrete steps should be",
+                            "Prepare for transition from strategic to tactical planning",
+                        ]
+                        response_data["next_steps"] = (
+                            f"PAUSE for final strategic reflection. DO NOT call planner yet.\n\n"
+                            f"FINAL DEEP THINKING before planner step {request.step_number + 1}:\n"
+                            f"1. VALIDATE COMPLETENESS: Does this plan address all original requirements?\n"
+                            f"2. CHECK FOR GAPS: What assumptions need validation? What's unclear?\n"
+                            f"3. PLAN FOR ADAPTATION: How will we know if we need to change course?\n"
+                            f"4. DEFINE FIRST STEPS: What are the first 2-3 concrete actions?\n"
+                            f"5. TRANSITION MINDSET: Ready to shift from strategic to tactical planning?\n\n"
+                            f"After this reflection, call planner with step_number: {request.step_number + 1} to continue with tactical details."
+                        )
+                else:
+                    # Normal flow for simple plans or later steps of complex plans
+                    response_data["next_steps"] = (
+                        f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining."
+                    )
+                # Result: Intermediate step, planning continues (with optional deep thinking pause)

            # Convert to clean JSON response
            response_content = json.dumps(response_data, indent=2)