Improved planner: thinks with depth and breadth
This commit is contained in:
@@ -14,7 +14,7 @@ import os
|
||||
# These values are used in server responses and for tracking releases
|
||||
# IMPORTANT: This is the single source of truth for version and author info
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
__version__ = "5.2.3"
|
||||
__version__ = "5.2.4"
|
||||
# Last update date in ISO format
|
||||
__updated__ = "2025-06-19"
|
||||
# Primary maintainer
|
||||
|
||||
@@ -108,7 +108,11 @@ class TestPlannerTool:
|
||||
assert parsed_response["total_steps"] == 10
|
||||
assert parsed_response["next_step_required"] is True
|
||||
assert parsed_response["continuation_id"] == "test-uuid-123"
|
||||
assert parsed_response["status"] == "planning_success"
|
||||
# For complex plans (>=5 steps) on first step, expect deep thinking pause
|
||||
assert parsed_response["status"] == "pause_for_deep_thinking"
|
||||
assert parsed_response["thinking_required"] is True
|
||||
assert "required_thinking" in parsed_response
|
||||
assert "MANDATORY: DO NOT call the planner tool again immediately" in parsed_response["next_steps"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_subsequent_step(self):
|
||||
@@ -139,7 +143,11 @@ class TestPlannerTool:
|
||||
assert parsed_response["total_steps"] == 8
|
||||
assert parsed_response["next_step_required"] is True
|
||||
assert parsed_response["continuation_id"] == "existing-uuid-456"
|
||||
assert parsed_response["status"] == "planning_success"
|
||||
# For complex plans (>=5 steps) on step 2, expect deep thinking pause
|
||||
assert parsed_response["status"] == "pause_for_deep_thinking"
|
||||
assert parsed_response["thinking_required"] is True
|
||||
assert "required_thinking" in parsed_response
|
||||
assert "STOP! Complex planning requires reflection between steps" in parsed_response["next_steps"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_with_continuation_context(self):
|
||||
@@ -410,4 +418,38 @@ class TestPlannerToolIntegration:
|
||||
assert parsed_response["step_number"] == 1
|
||||
assert parsed_response["total_steps"] == 5
|
||||
assert parsed_response["continuation_id"] == "test-flow-uuid"
|
||||
# For complex plans (>=5 steps) on first step, expect deep thinking pause
|
||||
assert parsed_response["status"] == "pause_for_deep_thinking"
|
||||
assert parsed_response["thinking_required"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_simple_planning_flow(self):
|
||||
"""Test simple planning flow without deep thinking pauses."""
|
||||
arguments = {
|
||||
"step": "Plan a simple feature update",
|
||||
"step_number": 1,
|
||||
"total_steps": 3, # Simple plan < 5 steps
|
||||
"next_step_required": True,
|
||||
}
|
||||
|
||||
# Mock conversation memory functions
|
||||
with patch("utils.conversation_memory.create_thread", return_value="test-simple-uuid"):
|
||||
with patch("utils.conversation_memory.add_turn"):
|
||||
result = await self.tool.execute(arguments)
|
||||
|
||||
# Verify response structure
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
|
||||
# Parse the JSON response
|
||||
import json
|
||||
|
||||
parsed_response = json.loads(response_text)
|
||||
|
||||
assert parsed_response["step_number"] == 1
|
||||
assert parsed_response["total_steps"] == 3
|
||||
assert parsed_response["continuation_id"] == "test-simple-uuid"
|
||||
# For simple plans (< 5 steps), expect normal flow without deep thinking pause
|
||||
assert parsed_response["status"] == "planning_success"
|
||||
assert "thinking_required" not in parsed_response
|
||||
assert "Continue with step 2" in parsed_response["next_steps"]
|
||||
|
||||
@@ -161,9 +161,13 @@ class PlannerTool(BaseTool):
|
||||
"- Add more steps even after reaching the initial estimate\n\n"
|
||||
"Key features:\n"
|
||||
"- Sequential thinking with full context awareness\n"
|
||||
"- Forced deep reflection for complex plans (≥5 steps) in early stages\n"
|
||||
"- Branching for exploring alternative strategies\n"
|
||||
"- Revision capabilities to update earlier decisions\n"
|
||||
"- Dynamic step count adjustment\n\n"
|
||||
"ENHANCED: For complex plans (≥5 steps), the first 3 steps enforce deep thinking pauses\n"
|
||||
"to prevent surface-level planning and ensure thorough consideration of alternatives,\n"
|
||||
"dependencies, and strategic decisions before moving to tactical details.\n\n"
|
||||
"Perfect for: complex project planning, system design with unknowns, "
|
||||
"migration strategies, architectural decisions, problem decomposition."
|
||||
)
|
||||
@@ -417,10 +421,77 @@ class PlannerTool(BaseTool):
|
||||
else:
|
||||
response_data["planning_complete"] = False
|
||||
remaining_steps = request.total_steps - request.step_number
|
||||
response_data["next_steps"] = (
|
||||
f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining."
|
||||
)
|
||||
# Result: Intermediate step, planning continues
|
||||
|
||||
# ENHANCED: Add deep thinking pauses for complex plans in early stages
|
||||
# Only for complex plans (>=5 steps) and first 3 steps - force deep reflection
|
||||
if request.total_steps >= 5 and request.step_number <= 3:
|
||||
response_data["status"] = "pause_for_deep_thinking"
|
||||
response_data["thinking_required"] = True
|
||||
|
||||
if request.step_number == 1:
|
||||
# Initial deep thinking - understand the full scope
|
||||
response_data["required_thinking"] = [
|
||||
"Analyze the complete scope and complexity of what needs to be planned",
|
||||
"Consider multiple approaches and their trade-offs",
|
||||
"Identify key constraints, dependencies, and potential challenges",
|
||||
"Think about stakeholders, success criteria, and critical requirements",
|
||||
"Consider what could go wrong and how to mitigate risks early",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"MANDATORY: DO NOT call the planner tool again immediately. This is a complex plan ({request.total_steps} steps) "
|
||||
f"that requires deep thinking. You MUST first spend time reflecting on the planning challenge:\n\n"
|
||||
f"REQUIRED DEEP THINKING before calling planner step {request.step_number + 1}:\n"
|
||||
f"1. Analyze the FULL SCOPE: What exactly needs to be accomplished?\n"
|
||||
f"2. Consider MULTIPLE APPROACHES: What are 2-3 different ways to tackle this?\n"
|
||||
f"3. Identify CONSTRAINTS & DEPENDENCIES: What limits our options?\n"
|
||||
f"4. Think about SUCCESS CRITERIA: How will we know we've succeeded?\n"
|
||||
f"5. Consider RISKS & MITIGATION: What could go wrong early vs late?\n\n"
|
||||
f"Only call planner again with step_number: {request.step_number + 1} AFTER this deep analysis."
|
||||
)
|
||||
elif request.step_number == 2:
|
||||
# Refine approach - dig deeper into the chosen direction
|
||||
response_data["required_thinking"] = [
|
||||
"Evaluate the approach from step 1 - are there better alternatives?",
|
||||
"Break down the major phases and identify critical decision points",
|
||||
"Consider resource requirements and potential bottlenecks",
|
||||
"Think about how different parts interconnect and affect each other",
|
||||
"Identify areas that need the most careful planning vs quick wins",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"STOP! Complex planning requires reflection between steps. DO NOT call planner immediately.\n\n"
|
||||
f"MANDATORY REFLECTION before planner step {request.step_number + 1}:\n"
|
||||
f"1. EVALUATE YOUR APPROACH: Is the direction from step 1 still the best?\n"
|
||||
f"2. IDENTIFY MAJOR PHASES: What are the 3-5 main chunks of work?\n"
|
||||
f"3. SPOT DEPENDENCIES: What must happen before what?\n"
|
||||
f"4. CONSIDER RESOURCES: What skills, tools, or access do we need?\n"
|
||||
f"5. FIND CRITICAL PATHS: Where could delays hurt the most?\n\n"
|
||||
f"Think deeply about these aspects, then call planner with step_number: {request.step_number + 1}."
|
||||
)
|
||||
elif request.step_number == 3:
|
||||
# Final deep thinking - validate and prepare for execution planning
|
||||
response_data["required_thinking"] = [
|
||||
"Validate that the emerging plan addresses the original requirements",
|
||||
"Identify any gaps or assumptions that need clarification",
|
||||
"Consider how to validate progress and adjust course if needed",
|
||||
"Think about what the first concrete steps should be",
|
||||
"Prepare for transition from strategic to tactical planning",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"PAUSE for final strategic reflection. DO NOT call planner yet.\n\n"
|
||||
f"FINAL DEEP THINKING before planner step {request.step_number + 1}:\n"
|
||||
f"1. VALIDATE COMPLETENESS: Does this plan address all original requirements?\n"
|
||||
f"2. CHECK FOR GAPS: What assumptions need validation? What's unclear?\n"
|
||||
f"3. PLAN FOR ADAPTATION: How will we know if we need to change course?\n"
|
||||
f"4. DEFINE FIRST STEPS: What are the first 2-3 concrete actions?\n"
|
||||
f"5. TRANSITION MINDSET: Ready to shift from strategic to tactical planning?\n\n"
|
||||
f"After this reflection, call planner with step_number: {request.step_number + 1} to continue with tactical details."
|
||||
)
|
||||
else:
|
||||
# Normal flow for simple plans or later steps of complex plans
|
||||
response_data["next_steps"] = (
|
||||
f"Continue with step {request.step_number + 1}. Approximately {remaining_steps} steps remaining."
|
||||
)
|
||||
# Result: Intermediate step, planning continues (with optional deep thinking pause)
|
||||
|
||||
# Convert to clean JSON response
|
||||
response_content = json.dumps(response_data, indent=2)
|
||||
|
||||
Reference in New Issue
Block a user