🚀 Major Enhancement: Workflow-Based Tool Architecture v5.5.0 (#95)

* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-21 00:08:11 +04:00
parent 4dae6e457e
commit 69a3121452
76 changed files with 17111 additions and 7725 deletions
--- a/tests/test_planner.py
+++ b/tests/test_planner.py
@@ -21,7 +21,7 @@ class TestPlannerTool:
        assert "SEQUENTIAL PLANNER" in tool.get_description()
        assert tool.get_default_temperature() == 0.5  # TEMPERATURE_BALANCED
        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
-        assert tool.get_default_thinking_mode() == "high"
+        assert tool.get_default_thinking_mode() == "medium"

    def test_request_validation(self):
        """Test Pydantic request model validation."""
@@ -57,10 +57,10 @@ class TestPlannerTool:
        assert "branch_id" in schema["properties"]
        assert "continuation_id" in schema["properties"]

-        # Check excluded fields are NOT present
-        assert "model" not in schema["properties"]
-        assert "images" not in schema["properties"]
-        assert "files" not in schema["properties"]
+        # Check that workflow-based planner includes model field and excludes some fields
+        assert "model" in schema["properties"]  # Workflow tools include model field
+        assert "images" not in schema["properties"]  # Excluded for planning
+        assert "files" not in schema["properties"]  # Excluded for planning
        assert "temperature" not in schema["properties"]
        assert "thinking_mode" not in schema["properties"]
        assert "use_websearch" not in schema["properties"]
@@ -90,8 +90,10 @@ class TestPlannerTool:
            "next_step_required": True,
        }

-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.create_thread", return_value="test-uuid-123"):
+        # Mock conversation memory functions and UUID generation
+        with patch("utils.conversation_memory.uuid.uuid4") as mock_uuid:
+            mock_uuid.return_value.hex = "test-uuid-123"
+            mock_uuid.return_value.__str__ = lambda x: "test-uuid-123"
            with patch("utils.conversation_memory.add_turn"):
                result = await tool.execute(arguments)

@@ -193,9 +195,10 @@ class TestPlannerTool:

        parsed_response = json.loads(response_text)

-        # Check for previous plan context in the structured response
-        assert "previous_plan_context" in parsed_response
-        assert "Authentication system" in parsed_response["previous_plan_context"]
+        # Check that the continuation works (workflow architecture handles context differently)
+        assert parsed_response["step_number"] == 1
+        assert parsed_response["continuation_id"] == "test-continuation-id"
+        assert parsed_response["next_step_required"] is True

    @pytest.mark.asyncio
    async def test_execute_final_step(self):
@@ -223,7 +226,7 @@ class TestPlannerTool:
        parsed_response = json.loads(response_text)

        # Check final step structure
-        assert parsed_response["status"] == "planning_success"
+        assert parsed_response["status"] == "planner_complete"
        assert parsed_response["step_number"] == 10
        assert parsed_response["planning_complete"] is True
        assert "plan_summary" in parsed_response
@@ -293,8 +296,8 @@ class TestPlannerTool:
        assert parsed_response["metadata"]["revises_step_number"] == 2

        # Check that step data was stored in history
-        assert len(tool.step_history) > 0
-        latest_step = tool.step_history[-1]
+        assert len(tool.work_history) > 0
+        latest_step = tool.work_history[-1]
        assert latest_step["is_step_revision"] is True
        assert latest_step["revises_step_number"] == 2

@@ -326,7 +329,7 @@ class TestPlannerTool:
        # Total steps should be adjusted to match current step
        assert parsed_response["total_steps"] == 8
        assert parsed_response["step_number"] == 8
-        assert parsed_response["status"] == "planning_success"
+        assert parsed_response["status"] == "pause_for_planner"

    @pytest.mark.asyncio
    async def test_execute_error_handling(self):
@@ -349,7 +352,7 @@ class TestPlannerTool:

        parsed_response = json.loads(response_text)

-        assert parsed_response["status"] == "planning_failed"
+        assert parsed_response["status"] == "planner_failed"
        assert "error" in parsed_response

    @pytest.mark.asyncio
@@ -375,9 +378,9 @@ class TestPlannerTool:
                await tool.execute(step2_args)

        # Should have tracked both steps
-        assert len(tool.step_history) == 2
-        assert tool.step_history[0]["step"] == "First step"
-        assert tool.step_history[1]["step"] == "Second step"
+        assert len(tool.work_history) == 2
+        assert tool.work_history[0]["step"] == "First step"
+        assert tool.work_history[1]["step"] == "Second step"


 # Integration test
@@ -401,8 +404,10 @@ class TestPlannerToolIntegration:
            "next_step_required": True,
        }

-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.create_thread", return_value="test-flow-uuid"):
+        # Mock conversation memory functions and UUID generation
+        with patch("utils.conversation_memory.uuid.uuid4") as mock_uuid:
+            mock_uuid.return_value.hex = "test-flow-uuid"
+            mock_uuid.return_value.__str__ = lambda x: "test-flow-uuid"
            with patch("utils.conversation_memory.add_turn"):
                result = await self.tool.execute(arguments)

@@ -432,8 +437,10 @@ class TestPlannerToolIntegration:
            "next_step_required": True,
        }

-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.create_thread", return_value="test-simple-uuid"):
+        # Mock conversation memory functions and UUID generation
+        with patch("utils.conversation_memory.uuid.uuid4") as mock_uuid:
+            mock_uuid.return_value.hex = "test-simple-uuid"
+            mock_uuid.return_value.__str__ = lambda x: "test-simple-uuid"
            with patch("utils.conversation_memory.add_turn"):
                result = await self.tool.execute(arguments)

@@ -450,6 +457,6 @@ class TestPlannerToolIntegration:
        assert parsed_response["total_steps"] == 3
        assert parsed_response["continuation_id"] == "test-simple-uuid"
        # For simple plans (< 5 steps), expect normal flow without deep thinking pause
-        assert parsed_response["status"] == "planning_success"
+        assert parsed_response["status"] == "pause_for_planner"
        assert "thinking_required" not in parsed_response
        assert "Continue with step 2" in parsed_response["next_steps"]