* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools --------- Co-authored-by: Claude <noreply@anthropic.com>
718 lines
30 KiB
Python
718 lines
30 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
PlannerWorkflow Tool Validation Test
|
||
|
||
Tests the planner tool's capabilities using the new workflow architecture.
|
||
This validates that the new workflow-based implementation maintains all the
|
||
functionality of the original planner tool while using the workflow pattern
|
||
like the debug tool.
|
||
"""
|
||
|
||
import json
|
||
from typing import Optional
|
||
|
||
from .conversation_base_test import ConversationBaseTest
|
||
|
||
|
||
class PlannerValidationTest(ConversationBaseTest):
|
||
"""Test planner tool with new workflow architecture"""
|
||
|
||
@property
|
||
def test_name(self) -> str:
|
||
return "planner_validation"
|
||
|
||
@property
|
||
def test_description(self) -> str:
|
||
return "PlannerWorkflow tool validation with new workflow architecture"
|
||
|
||
def run_test(self) -> bool:
|
||
"""Test planner tool capabilities"""
|
||
# Set up the test environment
|
||
self.setUp()
|
||
|
||
try:
|
||
self.logger.info("Test: PlannerWorkflow tool validation (new architecture)")
|
||
|
||
# Test 1: Single planning session with workflow architecture
|
||
if not self._test_single_planning_session():
|
||
return False
|
||
|
||
# Test 2: Planning with continuation using workflow
|
||
if not self._test_planning_with_continuation():
|
||
return False
|
||
|
||
# Test 3: Complex plan with deep thinking pauses
|
||
if not self._test_complex_plan_deep_thinking():
|
||
return False
|
||
|
||
# Test 4: Self-contained completion (no expert analysis)
|
||
if not self._test_self_contained_completion():
|
||
return False
|
||
|
||
# Test 5: Branching and revision with workflow
|
||
if not self._test_branching_and_revision():
|
||
return False
|
||
|
||
# Test 6: Workflow file context behavior
|
||
if not self._test_workflow_file_context():
|
||
return False
|
||
|
||
self.logger.info(" ✅ All planner validation tests passed")
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"PlannerWorkflow validation test failed: {e}")
|
||
return False
|
||
|
||
def _test_single_planning_session(self) -> bool:
|
||
"""Test a complete planning session with workflow architecture"""
|
||
try:
|
||
self.logger.info(" 1.1: Testing single planning session with workflow")
|
||
|
||
# Step 1: Start planning
|
||
self.logger.info(" 1.1.1: Step 1 - Initial planning step")
|
||
response1, continuation_id = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "I need to plan a comprehensive API redesign for our legacy system. Let me start by analyzing the current state and identifying key requirements for the new API architecture.",
|
||
"step_number": 1,
|
||
"total_steps": 4,
|
||
"next_step_required": True,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response1 or not continuation_id:
|
||
self.logger.error("Failed to get initial planning response")
|
||
return False
|
||
|
||
# Parse and validate JSON response
|
||
response1_data = self._parse_planner_response(response1)
|
||
if not response1_data:
|
||
return False
|
||
|
||
# Validate step 1 response structure - expect pause_for_planner for next_step_required=True
|
||
if not self._validate_step_response(response1_data, 1, 4, True, "pause_for_planner"):
|
||
return False
|
||
|
||
# Debug: Log the actual response structure to see what we're getting
|
||
self.logger.debug(f"Response structure: {list(response1_data.keys())}")
|
||
|
||
# Check workflow-specific response structure (more flexible)
|
||
status_key = None
|
||
for key in response1_data.keys():
|
||
if key.endswith("_status"):
|
||
status_key = key
|
||
break
|
||
|
||
if not status_key:
|
||
self.logger.error(f"Missing workflow status field in response: {list(response1_data.keys())}")
|
||
return False
|
||
|
||
self.logger.debug(f"Found status field: {status_key}")
|
||
|
||
# Check required_actions for workflow guidance
|
||
if not response1_data.get("required_actions"):
|
||
self.logger.error("Missing required_actions in workflow response")
|
||
return False
|
||
|
||
self.logger.info(f" ✅ Step 1 successful with workflow, continuation_id: {continuation_id}")
|
||
|
||
# Step 2: Continue planning
|
||
self.logger.info(" 1.1.2: Step 2 - API domain analysis")
|
||
response2, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "After analyzing the current API, I can identify three main domains: User Management, Content Management, and Analytics. Let me design the new API structure with RESTful endpoints and proper versioning.",
|
||
"step_number": 2,
|
||
"total_steps": 4,
|
||
"next_step_required": True,
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response2:
|
||
self.logger.error("Failed to continue planning to step 2")
|
||
return False
|
||
|
||
response2_data = self._parse_planner_response(response2)
|
||
if not self._validate_step_response(response2_data, 2, 4, True, "pause_for_planner"):
|
||
return False
|
||
|
||
# Check step history tracking in workflow (more flexible)
|
||
status_key = None
|
||
for key in response2_data.keys():
|
||
if key.endswith("_status"):
|
||
status_key = key
|
||
break
|
||
|
||
if status_key:
|
||
workflow_status = response2_data.get(status_key, {})
|
||
step_history_length = workflow_status.get("step_history_length", 0)
|
||
if step_history_length < 2:
|
||
self.logger.error(f"Step history not properly tracked in workflow: {step_history_length}")
|
||
return False
|
||
self.logger.debug(f"Step history length: {step_history_length}")
|
||
else:
|
||
self.logger.warning("No workflow status found, skipping step history check")
|
||
|
||
self.logger.info(" ✅ Step 2 successful with workflow tracking")
|
||
|
||
# Step 3: Final step - should trigger completion
|
||
self.logger.info(" 1.1.3: Step 3 - Final planning step")
|
||
response3, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "API redesign plan complete: Phase 1 - User Management API, Phase 2 - Content Management API, Phase 3 - Analytics API. Each phase includes proper authentication, rate limiting, and comprehensive documentation.",
|
||
"step_number": 3,
|
||
"total_steps": 3, # Adjusted total
|
||
"next_step_required": False, # Final step - should complete without expert analysis
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response3:
|
||
self.logger.error("Failed to complete planning session")
|
||
return False
|
||
|
||
response3_data = self._parse_planner_response(response3)
|
||
if not response3_data:
|
||
return False
|
||
|
||
# Validate final response structure - should be self-contained completion
|
||
if response3_data.get("status") != "planner_complete":
|
||
self.logger.error(f"Expected status 'planner_complete', got '{response3_data.get('status')}'")
|
||
return False
|
||
|
||
if not response3_data.get("planning_complete"):
|
||
self.logger.error("Expected planning_complete=true for final step")
|
||
return False
|
||
|
||
# Should NOT have expert_analysis (self-contained)
|
||
if "expert_analysis" in response3_data:
|
||
self.logger.error("PlannerWorkflow should be self-contained without expert analysis")
|
||
return False
|
||
|
||
# Check plan_summary exists
|
||
if not response3_data.get("plan_summary"):
|
||
self.logger.error("Missing plan_summary in final step")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Planning session completed successfully with workflow architecture")
|
||
|
||
# Store continuation_id for next test
|
||
self.api_continuation_id = continuation_id
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Single planning session test failed: {e}")
|
||
return False
|
||
|
||
def _test_planning_with_continuation(self) -> bool:
|
||
"""Test planning continuation with workflow architecture"""
|
||
try:
|
||
self.logger.info(" 1.2: Testing planning continuation with workflow")
|
||
|
||
# Use continuation from previous test if available
|
||
continuation_id = getattr(self, "api_continuation_id", None)
|
||
if not continuation_id:
|
||
# Start fresh if no continuation available
|
||
self.logger.info(" 1.2.0: Starting fresh planning session")
|
||
response0, continuation_id = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Planning API security strategy",
|
||
"step_number": 1,
|
||
"total_steps": 2,
|
||
"next_step_required": True,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
if not response0 or not continuation_id:
|
||
self.logger.error("Failed to start fresh planning session")
|
||
return False
|
||
|
||
# Test continuation step
|
||
self.logger.info(" 1.2.1: Continue planning session")
|
||
response1, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Building on the API redesign, let me now plan the security implementation with OAuth 2.0, API keys, and rate limiting strategies.",
|
||
"step_number": 2,
|
||
"total_steps": 2,
|
||
"next_step_required": True,
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response1:
|
||
self.logger.error("Failed to continue planning")
|
||
return False
|
||
|
||
response1_data = self._parse_planner_response(response1)
|
||
if not response1_data:
|
||
return False
|
||
|
||
# Validate continuation behavior
|
||
if not self._validate_step_response(response1_data, 2, 2, True, "pause_for_planner"):
|
||
return False
|
||
|
||
# Check that continuation_id is preserved
|
||
if response1_data.get("continuation_id") != continuation_id:
|
||
self.logger.error("Continuation ID not preserved in workflow")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Planning continuation working with workflow")
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Planning continuation test failed: {e}")
|
||
return False
|
||
|
||
def _test_complex_plan_deep_thinking(self) -> bool:
|
||
"""Test complex plan with deep thinking pauses"""
|
||
try:
|
||
self.logger.info(" 1.3: Testing complex plan with deep thinking pauses")
|
||
|
||
# Start complex plan (≥5 steps) - should trigger deep thinking
|
||
self.logger.info(" 1.3.1: Step 1 of complex plan (should trigger deep thinking)")
|
||
response1, continuation_id = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "I need to plan a complete digital transformation for our enterprise organization, including cloud migration, process automation, and cultural change management.",
|
||
"step_number": 1,
|
||
"total_steps": 8, # Complex plan ≥5 steps
|
||
"next_step_required": True,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response1 or not continuation_id:
|
||
self.logger.error("Failed to start complex planning")
|
||
return False
|
||
|
||
response1_data = self._parse_planner_response(response1)
|
||
if not response1_data:
|
||
return False
|
||
|
||
# Should trigger deep thinking pause for complex plan
|
||
if response1_data.get("status") != "pause_for_deep_thinking":
|
||
self.logger.error("Expected deep thinking pause for complex plan step 1")
|
||
return False
|
||
|
||
if not response1_data.get("thinking_required"):
|
||
self.logger.error("Expected thinking_required=true for complex plan")
|
||
return False
|
||
|
||
# Check required thinking actions
|
||
required_thinking = response1_data.get("required_thinking", [])
|
||
if len(required_thinking) < 4:
|
||
self.logger.error("Expected comprehensive thinking requirements for complex plan")
|
||
return False
|
||
|
||
# Check for deep thinking guidance in next_steps
|
||
next_steps = response1_data.get("next_steps", "")
|
||
if "MANDATORY" not in next_steps or "deep thinking" not in next_steps.lower():
|
||
self.logger.error("Expected mandatory deep thinking guidance")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Complex plan step 1 correctly triggered deep thinking pause")
|
||
|
||
# Step 2 of complex plan - should also trigger deep thinking
|
||
self.logger.info(" 1.3.2: Step 2 of complex plan (should trigger deep thinking)")
|
||
response2, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "After deep analysis, I can see this transformation requires three parallel tracks: Technical Infrastructure, Business Process, and Human Capital. Let me design the coordination strategy.",
|
||
"step_number": 2,
|
||
"total_steps": 8,
|
||
"next_step_required": True,
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response2:
|
||
self.logger.error("Failed to continue complex planning")
|
||
return False
|
||
|
||
response2_data = self._parse_planner_response(response2)
|
||
if not response2_data:
|
||
return False
|
||
|
||
# Step 2 should also trigger deep thinking for complex plans
|
||
if response2_data.get("status") != "pause_for_deep_thinking":
|
||
self.logger.error("Expected deep thinking pause for complex plan step 2")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Complex plan step 2 correctly triggered deep thinking pause")
|
||
|
||
# Step 4 of complex plan - should use normal flow (after step 3)
|
||
self.logger.info(" 1.3.3: Step 4 of complex plan (should use normal flow)")
|
||
response4, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Now moving to tactical planning: Phase 1 execution details with specific timelines and resource allocation for the technical infrastructure track.",
|
||
"step_number": 4,
|
||
"total_steps": 8,
|
||
"next_step_required": True,
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response4:
|
||
self.logger.error("Failed to continue to step 4")
|
||
return False
|
||
|
||
response4_data = self._parse_planner_response(response4)
|
||
if not response4_data:
|
||
return False
|
||
|
||
# Step 4 should use normal flow (no more deep thinking pauses)
|
||
if response4_data.get("status") != "pause_for_planner":
|
||
self.logger.error("Expected normal planning flow for step 4")
|
||
return False
|
||
|
||
if response4_data.get("thinking_required"):
|
||
self.logger.error("Step 4 should not require special thinking pause")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Complex plan transitions to normal flow after step 3")
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Complex plan deep thinking test failed: {e}")
|
||
return False
|
||
|
||
def _test_self_contained_completion(self) -> bool:
|
||
"""Test self-contained completion without expert analysis"""
|
||
try:
|
||
self.logger.info(" 1.4: Testing self-contained completion")
|
||
|
||
# Simple planning session that should complete without expert analysis
|
||
self.logger.info(" 1.4.1: Simple planning session")
|
||
response1, continuation_id = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Planning a simple website redesign with new color scheme and improved navigation.",
|
||
"step_number": 1,
|
||
"total_steps": 2,
|
||
"next_step_required": True,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response1 or not continuation_id:
|
||
self.logger.error("Failed to start simple planning")
|
||
return False
|
||
|
||
# Final step - should complete without expert analysis
|
||
self.logger.info(" 1.4.2: Final step - self-contained completion")
|
||
response2, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Website redesign plan complete: Phase 1 - Update color palette and typography, Phase 2 - Redesign navigation structure and user flows.",
|
||
"step_number": 2,
|
||
"total_steps": 2,
|
||
"next_step_required": False, # Final step
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response2:
|
||
self.logger.error("Failed to complete simple planning")
|
||
return False
|
||
|
||
response2_data = self._parse_planner_response(response2)
|
||
if not response2_data:
|
||
return False
|
||
|
||
# Validate self-contained completion
|
||
if response2_data.get("status") != "planner_complete":
|
||
self.logger.error("Expected self-contained completion status")
|
||
return False
|
||
|
||
# Should NOT call expert analysis
|
||
if "expert_analysis" in response2_data:
|
||
self.logger.error("PlannerWorkflow should not call expert analysis")
|
||
return False
|
||
|
||
# Should have planning_complete flag
|
||
if not response2_data.get("planning_complete"):
|
||
self.logger.error("Expected planning_complete=true")
|
||
return False
|
||
|
||
# Should have plan_summary
|
||
if not response2_data.get("plan_summary"):
|
||
self.logger.error("Expected plan_summary in completion")
|
||
return False
|
||
|
||
# Check completion instructions
|
||
output = response2_data.get("output", {})
|
||
if not output.get("instructions"):
|
||
self.logger.error("Missing output instructions for plan presentation")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Self-contained completion working correctly")
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Self-contained completion test failed: {e}")
|
||
return False
|
||
|
||
def _test_branching_and_revision(self) -> bool:
|
||
"""Test branching and revision with workflow architecture"""
|
||
try:
|
||
self.logger.info(" 1.5: Testing branching and revision with workflow")
|
||
|
||
# Start planning session for branching test
|
||
self.logger.info(" 1.5.1: Start planning for branching test")
|
||
response1, continuation_id = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Planning mobile app development strategy with different technology options to evaluate.",
|
||
"step_number": 1,
|
||
"total_steps": 4,
|
||
"next_step_required": True,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response1 or not continuation_id:
|
||
self.logger.error("Failed to start branching test")
|
||
return False
|
||
|
||
# Create branch
|
||
self.logger.info(" 1.5.2: Create branch for React Native approach")
|
||
response2, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Branch A: React Native approach - cross-platform development with shared codebase, faster development cycle, and consistent UI across platforms.",
|
||
"step_number": 2,
|
||
"total_steps": 4,
|
||
"next_step_required": True,
|
||
"is_branch_point": True,
|
||
"branch_from_step": 1,
|
||
"branch_id": "react-native",
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response2:
|
||
self.logger.error("Failed to create branch")
|
||
return False
|
||
|
||
response2_data = self._parse_planner_response(response2)
|
||
if not response2_data:
|
||
return False
|
||
|
||
# Validate branching in workflow
|
||
metadata = response2_data.get("metadata", {})
|
||
if not metadata.get("is_branch_point"):
|
||
self.logger.error("Branch point not recorded in workflow")
|
||
return False
|
||
|
||
if metadata.get("branch_id") != "react-native":
|
||
self.logger.error("Branch ID not properly recorded")
|
||
return False
|
||
|
||
if "react-native" not in metadata.get("branches", []):
|
||
self.logger.error("Branch not added to branches list")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Branching working with workflow architecture")
|
||
|
||
# Test revision
|
||
self.logger.info(" 1.5.3: Test revision capability")
|
||
response3, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Revision of step 2: After consideration, let me revise the React Native approach to include performance optimizations and native module integration for critical features.",
|
||
"step_number": 3,
|
||
"total_steps": 4,
|
||
"next_step_required": True,
|
||
"is_step_revision": True,
|
||
"revises_step_number": 2,
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response3:
|
||
self.logger.error("Failed to create revision")
|
||
return False
|
||
|
||
response3_data = self._parse_planner_response(response3)
|
||
if not response3_data:
|
||
return False
|
||
|
||
# Validate revision in workflow
|
||
metadata = response3_data.get("metadata", {})
|
||
if not metadata.get("is_step_revision"):
|
||
self.logger.error("Step revision not recorded in workflow")
|
||
return False
|
||
|
||
if metadata.get("revises_step_number") != 2:
|
||
self.logger.error("Revised step number not properly recorded")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Revision working with workflow architecture")
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Branching and revision test failed: {e}")
|
||
return False
|
||
|
||
def _test_workflow_file_context(self) -> bool:
|
||
"""Test workflow file context behavior (should be minimal for planner)"""
|
||
try:
|
||
self.logger.info(" 1.6: Testing workflow file context behavior")
|
||
|
||
# Planner typically doesn't use files, but test the workflow handles this correctly
|
||
self.logger.info(" 1.6.1: Planning step with no files (normal case)")
|
||
response1, continuation_id = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Planning data architecture for analytics platform.",
|
||
"step_number": 1,
|
||
"total_steps": 2,
|
||
"next_step_required": True,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response1 or not continuation_id:
|
||
self.logger.error("Failed to start workflow file context test")
|
||
return False
|
||
|
||
response1_data = self._parse_planner_response(response1)
|
||
if not response1_data:
|
||
return False
|
||
|
||
# Planner workflow should not have file_context since it doesn't use files
|
||
if "file_context" in response1_data:
|
||
self.logger.info(" ℹ️ Workflow file context present but should be minimal for planner")
|
||
|
||
# Final step
|
||
self.logger.info(" 1.6.2: Final step (should complete without file embedding)")
|
||
response2, _ = self.call_mcp_tool(
|
||
"planner",
|
||
{
|
||
"step": "Data architecture plan complete with data lakes, processing pipelines, and analytics layers.",
|
||
"step_number": 2,
|
||
"total_steps": 2,
|
||
"next_step_required": False,
|
||
"continuation_id": continuation_id,
|
||
"model": "flash",
|
||
},
|
||
)
|
||
|
||
if not response2:
|
||
self.logger.error("Failed to complete workflow file context test")
|
||
return False
|
||
|
||
response2_data = self._parse_planner_response(response2)
|
||
if not response2_data:
|
||
return False
|
||
|
||
# Final step should complete self-contained
|
||
if response2_data.get("status") != "planner_complete":
|
||
self.logger.error("Expected self-contained completion for planner workflow")
|
||
return False
|
||
|
||
self.logger.info(" ✅ Workflow file context behavior appropriate for planner")
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Workflow file context test failed: {e}")
|
||
return False
|
||
|
||
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||
"""Call an MCP tool in-process - override for planner-specific response handling"""
|
||
# Use in-process implementation to maintain conversation memory
|
||
response_text, _ = self.call_mcp_tool_direct(tool_name, params)
|
||
|
||
if not response_text:
|
||
return None, None
|
||
|
||
# Extract continuation_id from planner response specifically
|
||
continuation_id = self._extract_planner_continuation_id(response_text)
|
||
|
||
return response_text, continuation_id
|
||
|
||
def _extract_planner_continuation_id(self, response_text: str) -> Optional[str]:
|
||
"""Extract continuation_id from planner response"""
|
||
try:
|
||
# Parse the response
|
||
response_data = json.loads(response_text)
|
||
return response_data.get("continuation_id")
|
||
|
||
except json.JSONDecodeError as e:
|
||
self.logger.debug(f"Failed to parse response for planner continuation_id: {e}")
|
||
return None
|
||
|
||
def _parse_planner_response(self, response_text: str) -> dict:
|
||
"""Parse planner tool JSON response"""
|
||
try:
|
||
# Parse the response - it should be direct JSON
|
||
return json.loads(response_text)
|
||
|
||
except json.JSONDecodeError as e:
|
||
self.logger.error(f"Failed to parse planner response as JSON: {e}")
|
||
self.logger.error(f"Response text: {response_text[:500]}...")
|
||
return {}
|
||
|
||
def _validate_step_response(
|
||
self,
|
||
response_data: dict,
|
||
expected_step: int,
|
||
expected_total: int,
|
||
expected_next_required: bool,
|
||
expected_status: str,
|
||
) -> bool:
|
||
"""Validate a planner step response structure"""
|
||
try:
|
||
# Check status
|
||
if response_data.get("status") != expected_status:
|
||
self.logger.error(f"Expected status '{expected_status}', got '{response_data.get('status')}'")
|
||
return False
|
||
|
||
# Check step number
|
||
if response_data.get("step_number") != expected_step:
|
||
self.logger.error(f"Expected step_number {expected_step}, got {response_data.get('step_number')}")
|
||
return False
|
||
|
||
# Check total steps
|
||
if response_data.get("total_steps") != expected_total:
|
||
self.logger.error(f"Expected total_steps {expected_total}, got {response_data.get('total_steps')}")
|
||
return False
|
||
|
||
# Check next_step_required
|
||
if response_data.get("next_step_required") != expected_next_required:
|
||
self.logger.error(
|
||
f"Expected next_step_required {expected_next_required}, got {response_data.get('next_step_required')}"
|
||
)
|
||
return False
|
||
|
||
# Check step_content exists
|
||
if not response_data.get("step_content"):
|
||
self.logger.error("Missing step_content in response")
|
||
return False
|
||
|
||
# Check next_steps guidance
|
||
if not response_data.get("next_steps"):
|
||
self.logger.error("Missing next_steps guidance in response")
|
||
return False
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Error validating step response: {e}")
|
||
return False
|