🚀 Major Enhancement: Workflow-Based Tool Architecture v5.5.0 (#95)

* WIP: new workflow architecture

* WIP: further improvements and cleanup

* WIP: cleanup and docks, replace old tool with new

* WIP: cleanup and docks, replace old tool with new

* WIP: new planner implementation using workflow

* WIP: precommit tool working as a workflow instead of a basic tool
Support for passing False to use_assistant_model to skip external models completely and use Claude only

* WIP: precommit workflow version swapped with old

* WIP: codereview

* WIP: replaced codereview

* WIP: replaced codereview

* WIP: replaced refactor

* WIP: workflow for thinkdeep

* WIP: ensure files get embedded correctly

* WIP: thinkdeep replaced with workflow version

* WIP: improved messaging when an external model's response is received

* WIP: analyze tool swapped

* WIP: updated tests
* Extract only the content when building history
* Use "relevant_files" for workflow tools only

* WIP: updated tests
* Extract only the content when building history
* Use "relevant_files" for workflow tools only

* WIP: fixed get_completion_next_steps_message missing param

* Fixed tests
Request for files consistently

* Fixed tests
Request for files consistently

* Fixed tests

* New testgen workflow tool
Updated docs

* Swap testgen workflow

* Fix CI test failures by excluding API-dependent tests

- Update GitHub Actions workflow to exclude simulation tests that require API keys
- Fix collaboration tests to properly mock workflow tool expert analysis calls
- Update test assertions to handle new workflow tool response format
- Ensure unit tests run without external API dependencies in CI

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* WIP - Update tests to match new tools

* WIP - Update tests to match new tools

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Beehive Innovations
2025-06-21 00:08:11 +04:00
committed by GitHub
parent 4dae6e457e
commit 69a3121452
76 changed files with 17111 additions and 7725 deletions

View File

@@ -62,8 +62,9 @@ class TestModelEnumeration:
if value is not None:
os.environ[key] = value
# Always set auto mode for these tests
os.environ["DEFAULT_MODEL"] = "auto"
# Set auto mode only if not explicitly set in provider_config
if "DEFAULT_MODEL" not in provider_config:
os.environ["DEFAULT_MODEL"] = "auto"
# Reload config to pick up changes
import config
@@ -103,19 +104,10 @@ class TestModelEnumeration:
for model in native_models:
assert model in models, f"Native model {model} should always be in enum"
@pytest.mark.skip(reason="Complex integration test - rely on simulator tests for provider testing")
def test_openrouter_models_with_api_key(self):
"""Test that OpenRouter models are included when API key is configured."""
self._setup_environment({"OPENROUTER_API_KEY": "test-key"})
tool = AnalyzeTool()
models = tool._get_available_models()
# Check for some known OpenRouter model aliases
openrouter_models = ["opus", "sonnet", "haiku", "mistral-large", "deepseek"]
found_count = sum(1 for m in openrouter_models if m in models)
assert found_count >= 3, f"Expected at least 3 OpenRouter models, found {found_count}"
assert len(models) > 20, f"With OpenRouter, should have many models, got {len(models)}"
pass
def test_openrouter_models_without_api_key(self):
"""Test that OpenRouter models are NOT included when API key is not configured."""
@@ -130,18 +122,10 @@ class TestModelEnumeration:
assert found_count == 0, "OpenRouter models should not be included without API key"
@pytest.mark.skip(reason="Integration test - rely on simulator tests for API testing")
def test_custom_models_with_custom_url(self):
"""Test that custom models are included when CUSTOM_API_URL is configured."""
self._setup_environment({"CUSTOM_API_URL": "http://localhost:11434"})
tool = AnalyzeTool()
models = tool._get_available_models()
# Check for custom models (marked with is_custom=true)
custom_models = ["local-llama", "llama3.2"]
found_count = sum(1 for m in custom_models if m in models)
assert found_count >= 1, f"Expected at least 1 custom model, found {found_count}"
pass
def test_custom_models_without_custom_url(self):
"""Test that custom models are NOT included when CUSTOM_API_URL is not configured."""
@@ -156,71 +140,15 @@ class TestModelEnumeration:
assert found_count == 0, "Custom models should not be included without CUSTOM_API_URL"
@pytest.mark.skip(reason="Integration test - rely on simulator tests for API testing")
def test_all_providers_combined(self):
"""Test that all models are included when all providers are configured."""
self._setup_environment(
{
"GEMINI_API_KEY": "test-key",
"OPENAI_API_KEY": "test-key",
"XAI_API_KEY": "test-key",
"OPENROUTER_API_KEY": "test-key",
"CUSTOM_API_URL": "http://localhost:11434",
}
)
tool = AnalyzeTool()
models = tool._get_available_models()
# Should have all types of models
assert "flash" in models # Gemini
assert "o3" in models # OpenAI
assert "grok" in models # X.AI
assert "opus" in models or "sonnet" in models # OpenRouter
assert "local-llama" in models or "llama3.2" in models # Custom
# Should have many models total
assert len(models) > 50, f"With all providers, should have 50+ models, got {len(models)}"
# No duplicates
assert len(models) == len(set(models)), "Should have no duplicate models"
pass
@pytest.mark.skip(reason="Integration test - rely on simulator tests for API testing")
def test_mixed_provider_combinations(self):
"""Test various mixed provider configurations."""
test_cases = [
# (provider_config, expected_model_samples, min_count)
(
{"GEMINI_API_KEY": "test", "OPENROUTER_API_KEY": "test"},
["flash", "pro", "opus"], # Gemini + OpenRouter models
30,
),
(
{"OPENAI_API_KEY": "test", "CUSTOM_API_URL": "http://localhost"},
["o3", "o4-mini", "local-llama"], # OpenAI + Custom models
18, # 14 native + ~4 custom models
),
(
{"XAI_API_KEY": "test", "OPENROUTER_API_KEY": "test"},
["grok", "grok-3", "opus"], # X.AI + OpenRouter models
30,
),
]
for provider_config, expected_samples, min_count in test_cases:
self._setup_environment(provider_config)
tool = AnalyzeTool()
models = tool._get_available_models()
# Check expected models are present
for model in expected_samples:
if model in ["local-llama", "llama3.2"]: # Custom models might not all be present
continue
assert model in models, f"Expected {model} with config {provider_config}"
# Check minimum count
assert (
len(models) >= min_count
), f"Expected at least {min_count} models with {provider_config}, got {len(models)}"
pass
def test_no_duplicates_with_overlapping_providers(self):
"""Test that models aren't duplicated when multiple providers offer the same model."""
@@ -243,20 +171,10 @@ class TestModelEnumeration:
duplicates = {m: count for m, count in model_counts.items() if count > 1}
assert len(duplicates) == 0, f"Found duplicate models: {duplicates}"
@pytest.mark.skip(reason="Integration test - rely on simulator tests for API testing")
def test_schema_enum_matches_get_available_models(self):
"""Test that the schema enum matches what _get_available_models returns."""
self._setup_environment({"OPENROUTER_API_KEY": "test", "CUSTOM_API_URL": "http://localhost:11434"})
tool = AnalyzeTool()
# Get models from both methods
available_models = tool._get_available_models()
schema = tool.get_input_schema()
schema_enum = schema["properties"]["model"]["enum"]
# They should match exactly
assert set(available_models) == set(schema_enum), "Schema enum should match _get_available_models output"
assert len(available_models) == len(schema_enum), "Should have same number of models (no duplicates)"
pass
@pytest.mark.parametrize(
"model_name,should_exist",
@@ -280,3 +198,97 @@ class TestModelEnumeration:
assert model_name in models, f"Native model {model_name} should always be present"
else:
assert model_name not in models, f"Model {model_name} should not be present"
def test_auto_mode_behavior_with_environment_variables(self):
"""Test auto mode behavior with various environment variable combinations."""
# Test different environment scenarios for auto mode
test_scenarios = [
{"name": "no_providers", "env": {}, "expected_behavior": "should_include_native_only"},
{
"name": "gemini_only",
"env": {"GEMINI_API_KEY": "test-key"},
"expected_behavior": "should_include_gemini_models",
},
{
"name": "openai_only",
"env": {"OPENAI_API_KEY": "test-key"},
"expected_behavior": "should_include_openai_models",
},
{"name": "xai_only", "env": {"XAI_API_KEY": "test-key"}, "expected_behavior": "should_include_xai_models"},
{
"name": "multiple_providers",
"env": {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": "test-key", "XAI_API_KEY": "test-key"},
"expected_behavior": "should_include_all_native_models",
},
]
for scenario in test_scenarios:
# Test each scenario independently
self._setup_environment(scenario["env"])
tool = AnalyzeTool()
models = tool._get_available_models()
# Always expect native models regardless of configuration
native_models = ["flash", "pro", "o3", "o3-mini", "grok"]
for model in native_models:
assert model in models, f"Native model {model} missing in {scenario['name']} scenario"
# Verify auto mode detection
assert tool.is_effective_auto_mode(), f"Auto mode should be active in {scenario['name']} scenario"
# Verify model schema includes model field in auto mode
schema = tool.get_input_schema()
assert "model" in schema["required"], f"Model field should be required in auto mode for {scenario['name']}"
assert "model" in schema["properties"], f"Model field should be in properties for {scenario['name']}"
# Verify enum contains expected models
model_enum = schema["properties"]["model"]["enum"]
for model in native_models:
assert model in model_enum, f"Native model {model} should be in enum for {scenario['name']}"
def test_auto_mode_model_selection_validation(self):
"""Test that auto mode properly validates model selection."""
self._setup_environment({"DEFAULT_MODEL": "auto", "GEMINI_API_KEY": "test-key"})
tool = AnalyzeTool()
# Verify auto mode is active
assert tool.is_effective_auto_mode()
# Test valid model selection
available_models = tool._get_available_models()
assert len(available_models) > 0, "Should have available models in auto mode"
# Test that model validation works
schema = tool.get_input_schema()
model_enum = schema["properties"]["model"]["enum"]
# All enum models should be in available models
for enum_model in model_enum:
assert enum_model in available_models, f"Enum model {enum_model} should be available"
# All available models should be in enum
for available_model in available_models:
assert available_model in model_enum, f"Available model {available_model} should be in enum"
def test_environment_variable_precedence(self):
"""Test that environment variables are properly handled for model availability."""
# Test that setting DEFAULT_MODEL to auto enables auto mode
self._setup_environment({"DEFAULT_MODEL": "auto"})
tool = AnalyzeTool()
assert tool.is_effective_auto_mode(), "DEFAULT_MODEL=auto should enable auto mode"
# Test environment variable combinations with auto mode
self._setup_environment({"DEFAULT_MODEL": "auto", "GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": "test-key"})
tool = AnalyzeTool()
models = tool._get_available_models()
# Should include native models from providers that are theoretically configured
native_models = ["flash", "pro", "o3", "o3-mini", "grok"]
for model in native_models:
assert model in models, f"Native model {model} should be available in auto mode"
# Verify auto mode is still active
assert tool.is_effective_auto_mode(), "Auto mode should remain active with multiple providers"