🚀 Major Enhancement: Workflow-Based Tool Architecture v5.5.0 (#95)

* WIP: new workflow architecture

* WIP: further improvements and cleanup

* WIP: cleanup and docks, replace old tool with new

* WIP: cleanup and docks, replace old tool with new

* WIP: new planner implementation using workflow

* WIP: precommit tool working as a workflow instead of a basic tool
Support for passing False to use_assistant_model to skip external models completely and use Claude only

* WIP: precommit workflow version swapped with old

* WIP: codereview

* WIP: replaced codereview

* WIP: replaced codereview

* WIP: replaced refactor

* WIP: workflow for thinkdeep

* WIP: ensure files get embedded correctly

* WIP: thinkdeep replaced with workflow version

* WIP: improved messaging when an external model's response is received

* WIP: analyze tool swapped

* WIP: updated tests
* Extract only the content when building history
* Use "relevant_files" for workflow tools only

* WIP: updated tests
* Extract only the content when building history
* Use "relevant_files" for workflow tools only

* WIP: fixed get_completion_next_steps_message missing param

* Fixed tests
Request for files consistently

* Fixed tests
Request for files consistently

* Fixed tests

* New testgen workflow tool
Updated docs

* Swap testgen workflow

* Fix CI test failures by excluding API-dependent tests

- Update GitHub Actions workflow to exclude simulation tests that require API keys
- Fix collaboration tests to properly mock workflow tool expert analysis calls
- Update test assertions to handle new workflow tool response format
- Ensure unit tests run without external API dependencies in CI

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* WIP - Update tests to match new tools

* WIP - Update tests to match new tools

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Beehive Innovations
2025-06-21 00:08:11 +04:00
committed by GitHub
parent 4dae6e457e
commit 69a3121452
76 changed files with 17111 additions and 7725 deletions

View File

@@ -6,7 +6,9 @@ Each test is in its own file for better organization and maintainability.
"""
from .base_test import BaseSimulatorTest
from .test_analyze_validation import AnalyzeValidationTest
from .test_basic_conversation import BasicConversationTest
from .test_codereview_validation import CodeReviewValidationTest
from .test_consensus_conversation import TestConsensusConversation
from .test_consensus_stance import TestConsensusStance
from .test_consensus_three_models import TestConsensusThreeModels
@@ -27,10 +29,12 @@ from .test_openrouter_models import OpenRouterModelsTest
from .test_per_tool_deduplication import PerToolDeduplicationTest
from .test_planner_continuation_history import PlannerContinuationHistoryTest
from .test_planner_validation import PlannerValidationTest
from .test_precommitworkflow_validation import PrecommitWorkflowValidationTest
# Redis validation test removed - no longer needed for standalone server
from .test_refactor_validation import RefactorValidationTest
from .test_testgen_validation import TestGenValidationTest
from .test_thinkdeep_validation import ThinkDeepWorkflowValidationTest
from .test_token_allocation_validation import TokenAllocationValidationTest
from .test_vision_capability import VisionCapabilityTest
from .test_xai_models import XAIModelsTest
@@ -38,6 +42,7 @@ from .test_xai_models import XAIModelsTest
# Test registry for dynamic loading
TEST_REGISTRY = {
"basic_conversation": BasicConversationTest,
"codereview_validation": CodeReviewValidationTest,
"content_validation": ContentValidationTest,
"per_tool_deduplication": PerToolDeduplicationTest,
"cross_tool_continuation": CrossToolContinuationTest,
@@ -52,8 +57,10 @@ TEST_REGISTRY = {
"openrouter_models": OpenRouterModelsTest,
"planner_validation": PlannerValidationTest,
"planner_continuation_history": PlannerContinuationHistoryTest,
"precommit_validation": PrecommitWorkflowValidationTest,
"token_allocation_validation": TokenAllocationValidationTest,
"testgen_validation": TestGenValidationTest,
"thinkdeep_validation": ThinkDeepWorkflowValidationTest,
"refactor_validation": RefactorValidationTest,
"debug_validation": DebugValidationTest,
"debug_certain_confidence": DebugCertainConfidenceTest,
@@ -63,19 +70,20 @@ TEST_REGISTRY = {
"consensus_conversation": TestConsensusConversation,
"consensus_stance": TestConsensusStance,
"consensus_three_models": TestConsensusThreeModels,
"analyze_validation": AnalyzeValidationTest,
# "o3_pro_expensive": O3ProExpensiveTest, # COMMENTED OUT - too expensive to run by default
}
__all__ = [
"BaseSimulatorTest",
"BasicConversationTest",
"CodeReviewValidationTest",
"ContentValidationTest",
"PerToolDeduplicationTest",
"CrossToolContinuationTest",
"CrossToolComprehensiveTest",
"LineNumberValidationTest",
"LogsValidationTest",
# "RedisValidationTest", # Removed - no longer needed for standalone server
"TestModelThinkingConfig",
"O3ModelSelectionTest",
"O3ProExpensiveTest",
@@ -84,8 +92,10 @@ __all__ = [
"OpenRouterModelsTest",
"PlannerValidationTest",
"PlannerContinuationHistoryTest",
"PrecommitWorkflowValidationTest",
"TokenAllocationValidationTest",
"TestGenValidationTest",
"ThinkDeepWorkflowValidationTest",
"RefactorValidationTest",
"DebugValidationTest",
"DebugCertainConfidenceTest",
@@ -95,5 +105,6 @@ __all__ = [
"TestConsensusConversation",
"TestConsensusStance",
"TestConsensusThreeModels",
"AnalyzeValidationTest",
"TEST_REGISTRY",
]