🚀 Major Enhancement: Workflow-Based Tool Architecture v5.5.0 (#95)

* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-21 00:08:11 +04:00
parent 4dae6e457e
commit 69a3121452
76 changed files with 17111 additions and 7725 deletions
--- a/simulator_tests/test_cross_tool_continuation.py
+++ b/simulator_tests/test_cross_tool_continuation.py
@@ -62,7 +62,7 @@ class CrossToolContinuationTest(ConversationBaseTest):
            self.logger.info("  1: Testing chat -> thinkdeep -> codereview")

            # Start with chat
-            chat_response, chat_id = self.call_mcp_tool_direct(
+            chat_response, chat_id = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
@@ -76,11 +76,15 @@ class CrossToolContinuationTest(ConversationBaseTest):
                return False

            # Continue with thinkdeep
-            thinkdeep_response, _ = self.call_mcp_tool_direct(
+            thinkdeep_response, _ = self.call_mcp_tool(
                "thinkdeep",
                {
-                    "prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
-                    "files": [self.test_files["python"]],  # Same file should be deduplicated
+                    "step": "Think deeply about potential performance issues in this code. Please use low thinking mode.",
+                    "step_number": 1,
+                    "total_steps": 1,
+                    "next_step_required": False,
+                    "findings": "Building on previous chat analysis to examine performance issues",
+                    "relevant_files": [self.test_files["python"]],  # Same file should be deduplicated
                    "continuation_id": chat_id,
                    "model": "flash",
                },
@@ -91,11 +95,15 @@ class CrossToolContinuationTest(ConversationBaseTest):
                return False

            # Continue with codereview
-            codereview_response, _ = self.call_mcp_tool_direct(
+            codereview_response, _ = self.call_mcp_tool(
                "codereview",
                {
-                    "files": [self.test_files["python"]],  # Same file should be deduplicated
-                    "prompt": "Building on our previous analysis, provide a comprehensive code review",
+                    "step": "Building on our previous analysis, provide a comprehensive code review",
+                    "step_number": 1,
+                    "total_steps": 1,
+                    "next_step_required": False,
+                    "findings": "Continuing from previous chat and thinkdeep analysis for comprehensive review",
+                    "relevant_files": [self.test_files["python"]],  # Same file should be deduplicated
                    "continuation_id": chat_id,
                    "model": "flash",
                },
@@ -118,11 +126,15 @@ class CrossToolContinuationTest(ConversationBaseTest):
            self.logger.info("  2: Testing analyze -> debug -> thinkdeep")

            # Start with analyze
-            analyze_response, analyze_id = self.call_mcp_tool_direct(
+            analyze_response, analyze_id = self.call_mcp_tool(
                "analyze",
                {
-                    "files": [self.test_files["python"]],
-                    "prompt": "Analyze this code for quality and performance issues",
+                    "step": "Analyze this code for quality and performance issues",
+                    "step_number": 1,
+                    "total_steps": 1,
+                    "next_step_required": False,
+                    "findings": "Starting analysis of Python code for quality and performance issues",
+                    "relevant_files": [self.test_files["python"]],
                    "model": "flash",
                },
            )
@@ -132,11 +144,15 @@ class CrossToolContinuationTest(ConversationBaseTest):
                return False

            # Continue with debug
-            debug_response, _ = self.call_mcp_tool_direct(
+            debug_response, _ = self.call_mcp_tool(
                "debug",
                {
-                    "files": [self.test_files["python"]],  # Same file should be deduplicated
-                    "prompt": "Based on our analysis, help debug the performance issue in fibonacci",
+                    "step": "Based on our analysis, help debug the performance issue in fibonacci",
+                    "step_number": 1,
+                    "total_steps": 1,
+                    "next_step_required": False,
+                    "findings": "Building on previous analysis to debug specific performance issue",
+                    "relevant_files": [self.test_files["python"]],  # Same file should be deduplicated
                    "continuation_id": analyze_id,
                    "model": "flash",
                },
@@ -147,11 +163,15 @@ class CrossToolContinuationTest(ConversationBaseTest):
                return False

            # Continue with thinkdeep
-            final_response, _ = self.call_mcp_tool_direct(
+            final_response, _ = self.call_mcp_tool(
                "thinkdeep",
                {
-                    "prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
-                    "files": [self.test_files["python"]],  # Same file should be deduplicated
+                    "step": "Think deeply about the architectural implications of the issues we've found. Please use low thinking mode.",
+                    "step_number": 1,
+                    "total_steps": 1,
+                    "next_step_required": False,
+                    "findings": "Building on analysis and debug findings to explore architectural implications",
+                    "relevant_files": [self.test_files["python"]],  # Same file should be deduplicated
                    "continuation_id": analyze_id,
                    "model": "flash",
                },
@@ -174,7 +194,7 @@ class CrossToolContinuationTest(ConversationBaseTest):
            self.logger.info("  3: Testing multi-file cross-tool continuation")

            # Start with both files
-            multi_response, multi_id = self.call_mcp_tool_direct(
+            multi_response, multi_id = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
@@ -188,11 +208,15 @@ class CrossToolContinuationTest(ConversationBaseTest):
                return False

            # Switch to codereview with same files (should use conversation history)
-            multi_review, _ = self.call_mcp_tool_direct(
+            multi_review, _ = self.call_mcp_tool(
                "codereview",
                {
-                    "files": [self.test_files["python"], self.test_files["config"]],  # Same files
-                    "prompt": "Review both files in the context of our previous discussion",
+                    "step": "Review both files in the context of our previous discussion",
+                    "step_number": 1,
+                    "total_steps": 1,
+                    "next_step_required": False,
+                    "findings": "Continuing multi-file analysis with code review perspective",
+                    "relevant_files": [self.test_files["python"], self.test_files["config"]],  # Same files
                    "continuation_id": multi_id,
                    "model": "flash",
                },