Improved tracer workflow tool
Updated 2.5 pro model name Add metadata to results Fix for https://github.com/BeehiveInnovations/zen-mcp-server/issues/98
This commit is contained in:
@@ -82,7 +82,7 @@
|
||||
"description": "Claude 3 Haiku - Fast and efficient with vision"
|
||||
},
|
||||
{
|
||||
"model_name": "google/gemini-2.5-pro-preview",
|
||||
"model_name": "google/gemini-2.5-pro",
|
||||
"aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"],
|
||||
"context_window": 1048576,
|
||||
"supports_extended_thinking": false,
|
||||
|
||||
@@ -207,8 +207,33 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
|
||||
if self._registry:
|
||||
for model_name in self._registry.list_models():
|
||||
# Check restrictions if enabled
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
# =====================================================================================
|
||||
# CRITICAL ALIAS-AWARE RESTRICTION CHECKING (Fixed Issue #98)
|
||||
# =====================================================================================
|
||||
# Previously, restrictions only checked full model names (e.g., "google/gemini-2.5-pro")
|
||||
# but users specify aliases in OPENROUTER_ALLOWED_MODELS (e.g., "pro").
|
||||
# This caused "no models available" error even with valid restrictions.
|
||||
#
|
||||
# Fix: Check both model name AND all aliases against restrictions
|
||||
# TEST COVERAGE: tests/test_provider_routing_bugs.py::TestOpenRouterAliasRestrictions
|
||||
# =====================================================================================
|
||||
if restriction_service:
|
||||
# Get model config to check aliases as well
|
||||
model_config = self._registry.resolve(model_name)
|
||||
allowed = False
|
||||
|
||||
# Check if model name itself is allowed
|
||||
if restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
allowed = True
|
||||
|
||||
# CRITICAL: Also check aliases - this fixes the alias restriction bug
|
||||
if not allowed and model_config and model_config.aliases:
|
||||
for alias in model_config.aliases:
|
||||
if restriction_service.is_allowed(self.get_provider_type(), alias):
|
||||
allowed = True
|
||||
break
|
||||
|
||||
if not allowed:
|
||||
continue
|
||||
|
||||
models.append(model_name)
|
||||
|
||||
@@ -179,7 +179,21 @@ class ModelProviderRegistry:
|
||||
continue
|
||||
|
||||
for model_name in available:
|
||||
if restriction_service and not restriction_service.is_allowed(provider_type, model_name):
|
||||
# =====================================================================================
|
||||
# CRITICAL: Prevent double restriction filtering (Fixed Issue #98)
|
||||
# =====================================================================================
|
||||
# Previously, both the provider AND registry applied restrictions, causing
|
||||
# double-filtering that resulted in "no models available" errors.
|
||||
#
|
||||
# Logic: If respect_restrictions=True, provider already filtered models,
|
||||
# so registry should NOT filter them again.
|
||||
# TEST COVERAGE: tests/test_provider_routing_bugs.py::TestOpenRouterAliasRestrictions
|
||||
# =====================================================================================
|
||||
if (
|
||||
restriction_service
|
||||
and not respect_restrictions # Only filter if provider didn't already filter
|
||||
and not restriction_service.is_allowed(provider_type, model_name)
|
||||
):
|
||||
logging.debug("Model %s filtered by restrictions", model_name)
|
||||
continue
|
||||
models[model_name] = provider_type
|
||||
|
||||
382
tests/test_provider_routing_bugs.py
Normal file
382
tests/test_provider_routing_bugs.py
Normal file
@@ -0,0 +1,382 @@
|
||||
"""
|
||||
Tests that reproduce and prevent provider routing bugs.
|
||||
|
||||
These tests specifically cover bugs that were found in production:
|
||||
1. Fallback provider registration bypassing API key validation
|
||||
2. OpenRouter alias-based restrictions not working
|
||||
3. Double restriction filtering
|
||||
4. Missing provider_used metadata
|
||||
"""
|
||||
|
||||
import os
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from providers.base import ProviderType
|
||||
from providers.registry import ModelProviderRegistry
|
||||
from tools.base import ToolRequest
|
||||
from tools.chat import ChatTool
|
||||
|
||||
|
||||
class MockRequest(ToolRequest):
|
||||
"""Mock request for testing."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class TestProviderRoutingBugs:
|
||||
"""Test cases that reproduce provider routing bugs."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up clean state before each test."""
|
||||
# Clear restriction service cache
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
# Clear provider registry
|
||||
registry = ModelProviderRegistry()
|
||||
registry._providers.clear()
|
||||
registry._initialized_providers.clear()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up after each test."""
|
||||
# Clear restriction service cache
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_fallback_routing_bug_reproduction(self):
|
||||
"""
|
||||
CRITICAL BUG TEST: Reproduce the bug where fallback logic auto-registers
|
||||
Google provider for 'flash' model without checking GEMINI_API_KEY.
|
||||
|
||||
Scenario: User has only OPENROUTER_API_KEY, requests 'flash' model.
|
||||
Bug: System incorrectly uses Google provider instead of OpenRouter.
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up bug scenario: only OpenRouter API key
|
||||
os.environ.pop("GEMINI_API_KEY", None) # No Google API key
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key"
|
||||
|
||||
# Register only OpenRouter provider (like in server.py:configure_providers)
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
# Create tool to test fallback logic
|
||||
tool = ChatTool()
|
||||
|
||||
# Test: Request 'flash' model - should use OpenRouter, not auto-register Google
|
||||
provider = tool.get_model_provider("flash")
|
||||
|
||||
# ASSERTION: Should get OpenRouter provider, not Google
|
||||
assert provider is not None, "Should find a provider for 'flash' model"
|
||||
assert provider.get_provider_type() == ProviderType.OPENROUTER, (
|
||||
f"Expected OpenRouter provider for 'flash' model with only OPENROUTER_API_KEY set, "
|
||||
f"but got {provider.get_provider_type()}"
|
||||
)
|
||||
|
||||
# Test common aliases that should all route to OpenRouter
|
||||
test_models = ["flash", "pro", "o3", "o3-mini", "o4-mini"]
|
||||
for model_name in test_models:
|
||||
provider = tool.get_model_provider(model_name)
|
||||
assert provider is not None, f"Should find provider for '{model_name}'"
|
||||
assert provider.get_provider_type() == ProviderType.OPENROUTER, (
|
||||
f"Model '{model_name}' should route to OpenRouter when only OPENROUTER_API_KEY is set, "
|
||||
f"but got {provider.get_provider_type()}"
|
||||
)
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_fallback_should_not_register_without_api_key(self):
|
||||
"""
|
||||
Test that fallback logic correctly validates API keys before registering providers.
|
||||
|
||||
This test ensures the fix in tools/base.py:2067-2081 works correctly.
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up scenario: NO API keys at all
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
# Create tool to test fallback logic
|
||||
tool = ChatTool()
|
||||
|
||||
# Test: Request 'flash' model with no API keys - should fail gracefully
|
||||
with pytest.raises(ValueError, match="No provider found for model 'flash'"):
|
||||
tool.get_model_provider("flash")
|
||||
|
||||
# Test: Request 'o3' model with no API keys - should fail gracefully
|
||||
with pytest.raises(ValueError, match="No provider found for model 'o3'"):
|
||||
tool.get_model_provider("o3")
|
||||
|
||||
# Verify no providers were auto-registered
|
||||
registry = ModelProviderRegistry()
|
||||
assert len(registry._providers) == 0, "No providers should be registered without API keys"
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_mixed_api_keys_correct_routing(self):
|
||||
"""
|
||||
Test that when multiple API keys are available, provider routing works correctly.
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up scenario: Multiple API keys available
|
||||
os.environ["GEMINI_API_KEY"] = "test-gemini-key"
|
||||
os.environ["OPENAI_API_KEY"] = "test-openai-key"
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key"
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
|
||||
# Register providers in priority order (like server.py)
|
||||
from providers.gemini import GeminiModelProvider
|
||||
from providers.openai_provider import OpenAIModelProvider
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
tool = ChatTool()
|
||||
|
||||
# Test priority order: Native APIs should be preferred over OpenRouter
|
||||
# Google models should use Google provider
|
||||
flash_provider = tool.get_model_provider("flash")
|
||||
assert (
|
||||
flash_provider.get_provider_type() == ProviderType.GOOGLE
|
||||
), "When both Google and OpenRouter API keys are available, 'flash' should prefer Google provider"
|
||||
|
||||
# OpenAI models should use OpenAI provider
|
||||
o3_provider = tool.get_model_provider("o3")
|
||||
assert (
|
||||
o3_provider.get_provider_type() == ProviderType.OPENAI
|
||||
), "When both OpenAI and OpenRouter API keys are available, 'o3' should prefer OpenAI provider"
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
|
||||
class TestOpenRouterAliasRestrictions:
|
||||
"""Test OpenRouter model restrictions with aliases - reproduces restriction bug."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up clean state before each test."""
|
||||
# Clear restriction service cache
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
# Clear provider registry
|
||||
registry = ModelProviderRegistry()
|
||||
registry._providers.clear()
|
||||
registry._initialized_providers.clear()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up after each test."""
|
||||
# Clear restriction service cache
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_openrouter_alias_restrictions_bug_reproduction(self):
|
||||
"""
|
||||
CRITICAL BUG TEST: Reproduce the bug where OpenRouter restrictions with aliases
|
||||
resulted in "no models available" error.
|
||||
|
||||
Bug scenario: OPENROUTER_ALLOWED_MODELS=o3-mini,pro,flash,o4-mini,o3
|
||||
Expected: 5 models available (aliases resolve to full names)
|
||||
Bug: 0 models available due to alias resolution failure
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in [
|
||||
"GEMINI_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"XAI_API_KEY",
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENROUTER_ALLOWED_MODELS",
|
||||
]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up bug scenario: Only OpenRouter with alias-based restrictions
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-key"
|
||||
os.environ["OPENROUTER_ALLOWED_MODELS"] = "o3-mini,pro,gpt4.1,flash,o4-mini,o3" # User's exact config
|
||||
|
||||
# Register OpenRouter provider
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
# Test: Get available models with restrictions
|
||||
available_models = ModelProviderRegistry.get_available_models(respect_restrictions=True)
|
||||
|
||||
# ASSERTION: Should have models available, not 0
|
||||
assert len(available_models) > 0, (
|
||||
f"Expected models available with alias restrictions 'o3-mini,pro,gpt4.1,flash,o4-mini,o3', "
|
||||
f"but got {len(available_models)} models. Available: {list(available_models.keys())}"
|
||||
)
|
||||
|
||||
# Expected aliases that should resolve to models:
|
||||
# o3-mini -> openai/o3-mini
|
||||
# pro -> google/gemini-2.5-pro
|
||||
# flash -> google/gemini-2.5-flash
|
||||
# o4-mini -> openai/o4-mini
|
||||
# o3 -> openai/o3
|
||||
# gpt4.1 -> should not exist (expected to be filtered out)
|
||||
|
||||
expected_models = {
|
||||
"openai/o3-mini",
|
||||
"google/gemini-2.5-pro",
|
||||
"google/gemini-2.5-flash",
|
||||
"openai/o4-mini",
|
||||
"openai/o3",
|
||||
}
|
||||
|
||||
available_model_names = set(available_models.keys())
|
||||
|
||||
# Should have at least the resolvable aliases (5 out of 6)
|
||||
assert len(available_model_names) >= 5, (
|
||||
f"Expected at least 5 models from alias restrictions, got {len(available_model_names)}: "
|
||||
f"{available_model_names}"
|
||||
)
|
||||
|
||||
# Check that expected models are present
|
||||
missing_models = expected_models - available_model_names
|
||||
assert len(missing_models) == 0, (
|
||||
f"Missing expected models from alias restrictions: {missing_models}. "
|
||||
f"Available: {available_model_names}"
|
||||
)
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_openrouter_mixed_alias_and_full_names(self):
|
||||
"""Test OpenRouter restrictions with mix of aliases and full model names."""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in [
|
||||
"GEMINI_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"XAI_API_KEY",
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENROUTER_ALLOWED_MODELS",
|
||||
]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up mixed restrictions: some aliases, some full names
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-key"
|
||||
os.environ["OPENROUTER_ALLOWED_MODELS"] = "o3-mini,anthropic/claude-3-opus,flash"
|
||||
|
||||
# Register OpenRouter provider
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
# Test: Get available models
|
||||
available_models = ModelProviderRegistry.get_available_models(respect_restrictions=True)
|
||||
|
||||
expected_models = {
|
||||
"openai/o3-mini", # from alias
|
||||
"anthropic/claude-3-opus", # full name
|
||||
"google/gemini-2.5-flash", # from alias
|
||||
}
|
||||
|
||||
available_model_names = set(available_models.keys())
|
||||
|
||||
assert (
|
||||
available_model_names == expected_models
|
||||
), f"Expected models {expected_models}, got {available_model_names}"
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
|
||||
class TestProviderMetadataBug:
|
||||
"""Test for missing provider_used metadata bug."""
|
||||
|
||||
def test_provider_used_metadata_included(self):
|
||||
"""
|
||||
Test that provider_used metadata is included in tool responses.
|
||||
|
||||
Bug: Only model_used was included, provider_used was missing.
|
||||
Fix: Added provider_used field in tools/base.py
|
||||
"""
|
||||
# Test the actual _parse_response method with model_info
|
||||
tool = ChatTool()
|
||||
|
||||
# Create mock provider
|
||||
mock_provider = Mock()
|
||||
mock_provider.get_provider_type.return_value = ProviderType.OPENROUTER
|
||||
|
||||
# Create model_info like the execute method does
|
||||
model_info = {"provider": mock_provider, "model_name": "test-model", "model_response": Mock()}
|
||||
|
||||
# Test _parse_response directly with a simple response
|
||||
request = MockRequest()
|
||||
result = tool._parse_response("Test response", request, model_info)
|
||||
|
||||
# Verify metadata includes both model_used and provider_used
|
||||
assert hasattr(result, "metadata"), "ToolOutput should have metadata"
|
||||
assert result.metadata is not None, "Metadata should not be None"
|
||||
assert "model_used" in result.metadata, "Metadata should include model_used"
|
||||
assert result.metadata["model_used"] == "test-model", "model_used should be correct"
|
||||
assert "provider_used" in result.metadata, "Metadata should include provider_used (bug fix)"
|
||||
assert result.metadata["provider_used"] == "openrouter", "provider_used should be correct"
|
||||
300
tests/test_workflow_metadata.py
Normal file
300
tests/test_workflow_metadata.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""
|
||||
Tests for workflow tool metadata functionality.
|
||||
|
||||
This test ensures that workflow tools include metadata (provider_used and model_used)
|
||||
in their responses, similar to regular tools, for consistent tracking across all tool types.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from providers.base import ProviderType
|
||||
from providers.registry import ModelProviderRegistry
|
||||
from tools.debug import DebugIssueTool
|
||||
|
||||
|
||||
class TestWorkflowMetadata:
|
||||
"""Test cases for workflow tool metadata functionality."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up clean state before each test."""
|
||||
# Clear restriction service cache
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
# Clear provider registry
|
||||
registry = ModelProviderRegistry()
|
||||
registry._providers.clear()
|
||||
registry._initialized_providers.clear()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up after each test."""
|
||||
# Clear restriction service cache
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_workflow_metadata_in_response(self):
|
||||
"""
|
||||
Test that workflow tools include metadata in their responses.
|
||||
|
||||
This test verifies that workflow tools (like debug) include provider_used
|
||||
and model_used metadata in their responses, ensuring consistency with
|
||||
regular tools for tracking purposes.
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up test environment with OpenRouter API key
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key"
|
||||
|
||||
# Register OpenRouter provider
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
# Create debug tool
|
||||
debug_tool = DebugIssueTool()
|
||||
|
||||
# Create mock model context like server.py does
|
||||
from utils.model_context import ModelContext
|
||||
|
||||
model_name = "flash"
|
||||
model_context = ModelContext(model_name)
|
||||
|
||||
# Create arguments with model context (like server.py provides)
|
||||
arguments = {
|
||||
"step": "Investigating the test issue to check metadata functionality",
|
||||
"step_number": 1,
|
||||
"total_steps": 2,
|
||||
"next_step_required": False, # Final step to trigger completion
|
||||
"findings": "Initial findings for test",
|
||||
"model": model_name,
|
||||
"confidence": "high",
|
||||
"_model_context": model_context,
|
||||
"_resolved_model_name": model_name,
|
||||
}
|
||||
|
||||
# Execute the workflow tool
|
||||
import asyncio
|
||||
|
||||
result = asyncio.run(debug_tool.execute_workflow(arguments))
|
||||
|
||||
# Parse the JSON response
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Verify metadata is present
|
||||
assert "metadata" in response_data, "Workflow response should include metadata"
|
||||
metadata = response_data["metadata"]
|
||||
|
||||
# Verify required metadata fields
|
||||
assert "tool_name" in metadata, "Metadata should include tool_name"
|
||||
assert "model_used" in metadata, "Metadata should include model_used"
|
||||
assert "provider_used" in metadata, "Metadata should include provider_used"
|
||||
|
||||
# Verify metadata values
|
||||
assert metadata["tool_name"] == "debug", "tool_name should be 'debug'"
|
||||
assert metadata["model_used"] == model_name, f"model_used should be '{model_name}'"
|
||||
assert metadata["provider_used"] == "openrouter", "provider_used should be 'openrouter'"
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_workflow_metadata_in_error_response(self):
|
||||
"""
|
||||
Test that workflow tools include metadata even in error responses.
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up test environment with OpenRouter API key
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key"
|
||||
|
||||
# Register OpenRouter provider
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
# Create debug tool
|
||||
debug_tool = DebugIssueTool()
|
||||
|
||||
# Create arguments with invalid data to trigger error
|
||||
model_name = "flash"
|
||||
arguments = {
|
||||
"step": "Test step",
|
||||
"step_number": "invalid", # This should cause an error during validation
|
||||
"_resolved_model_name": model_name,
|
||||
}
|
||||
|
||||
# Execute the workflow tool - should fail gracefully
|
||||
import asyncio
|
||||
|
||||
result = asyncio.run(debug_tool.execute(arguments))
|
||||
|
||||
# Parse the JSON response
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Verify it's an error response with metadata
|
||||
assert "status" in response_data
|
||||
assert "error" in response_data or "content" in response_data
|
||||
assert "metadata" in response_data, "Error responses should include metadata"
|
||||
|
||||
metadata = response_data["metadata"]
|
||||
assert "tool_name" in metadata, "Error metadata should include tool_name"
|
||||
assert metadata["tool_name"] == "debug", "tool_name should be 'debug'"
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_workflow_metadata_fallback_handling(self):
|
||||
"""
|
||||
Test that workflow tools handle metadata gracefully when model context is missing.
|
||||
"""
|
||||
# Create debug tool
|
||||
debug_tool = DebugIssueTool()
|
||||
|
||||
# Create arguments without model context (fallback scenario)
|
||||
arguments = {
|
||||
"step": "Test step without model context",
|
||||
"step_number": 1,
|
||||
"total_steps": 1,
|
||||
"next_step_required": False,
|
||||
"findings": "Test findings",
|
||||
"model": "flash",
|
||||
"confidence": "low",
|
||||
# No _model_context or _resolved_model_name
|
||||
}
|
||||
|
||||
# Execute the workflow tool
|
||||
import asyncio
|
||||
|
||||
result = asyncio.run(debug_tool.execute_workflow(arguments))
|
||||
|
||||
# Parse the JSON response
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Verify metadata is still present with fallback values
|
||||
assert "metadata" in response_data, "Workflow response should include metadata even in fallback"
|
||||
metadata = response_data["metadata"]
|
||||
|
||||
# Verify fallback metadata
|
||||
assert "tool_name" in metadata, "Fallback metadata should include tool_name"
|
||||
assert "model_used" in metadata, "Fallback metadata should include model_used"
|
||||
assert "provider_used" in metadata, "Fallback metadata should include provider_used"
|
||||
|
||||
assert metadata["tool_name"] == "debug", "tool_name should be 'debug'"
|
||||
assert metadata["model_used"] == "flash", "model_used should be from request"
|
||||
assert metadata["provider_used"] == "unknown", "provider_used should be 'unknown' in fallback"
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
def test_workflow_metadata_preserves_existing_response_fields(self):
|
||||
"""
|
||||
Test that adding metadata doesn't interfere with existing workflow response fields.
|
||||
"""
|
||||
# Save original environment
|
||||
original_env = {}
|
||||
for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]:
|
||||
original_env[key] = os.environ.get(key)
|
||||
|
||||
try:
|
||||
# Set up test environment
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
os.environ.pop("XAI_API_KEY", None)
|
||||
os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key"
|
||||
|
||||
# Register OpenRouter provider
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
|
||||
|
||||
# Create debug tool
|
||||
debug_tool = DebugIssueTool()
|
||||
|
||||
# Create mock model context
|
||||
from utils.model_context import ModelContext
|
||||
|
||||
model_name = "flash"
|
||||
model_context = ModelContext(model_name)
|
||||
|
||||
# Create arguments for intermediate step
|
||||
arguments = {
|
||||
"step": "Testing intermediate step for metadata preservation",
|
||||
"step_number": 1,
|
||||
"total_steps": 3,
|
||||
"next_step_required": True, # Intermediate step
|
||||
"findings": "Intermediate findings",
|
||||
"model": model_name,
|
||||
"confidence": "medium",
|
||||
"_model_context": model_context,
|
||||
"_resolved_model_name": model_name,
|
||||
}
|
||||
|
||||
# Execute the workflow tool
|
||||
import asyncio
|
||||
|
||||
result = asyncio.run(debug_tool.execute_workflow(arguments))
|
||||
|
||||
# Parse the JSON response
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Verify standard workflow fields are preserved
|
||||
assert "status" in response_data, "Standard workflow status should be preserved"
|
||||
assert "step_number" in response_data, "Standard workflow step_number should be preserved"
|
||||
assert "total_steps" in response_data, "Standard workflow total_steps should be preserved"
|
||||
assert "next_step_required" in response_data, "Standard workflow next_step_required should be preserved"
|
||||
|
||||
# Verify metadata was added without breaking existing fields
|
||||
assert "metadata" in response_data, "Metadata should be added"
|
||||
metadata = response_data["metadata"]
|
||||
assert metadata["tool_name"] == "debug"
|
||||
assert metadata["model_used"] == model_name
|
||||
assert metadata["provider_used"] == "openrouter"
|
||||
|
||||
# Verify specific intermediate step fields
|
||||
assert response_data["next_step_required"] is True, "next_step_required should be preserved"
|
||||
assert response_data["step_number"] == 1, "step_number should be preserved"
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
for key, value in original_env.items():
|
||||
if value is None:
|
||||
os.environ.pop(key, None)
|
||||
else:
|
||||
os.environ[key] = value
|
||||
@@ -1635,6 +1635,12 @@ When recommending searches, be specific about what information you need and why
|
||||
model_name = model_info.get("model_name")
|
||||
if model_name:
|
||||
metadata["model_used"] = model_name
|
||||
# FEATURE: Add provider_used metadata (Added for Issue #98)
|
||||
# This shows which provider (google, openai, openrouter, etc.) handled the request
|
||||
# TEST COVERAGE: tests/test_provider_routing_bugs.py::TestProviderMetadataBug
|
||||
provider = model_info.get("provider")
|
||||
if provider:
|
||||
metadata["provider_used"] = provider.get_provider_type().value
|
||||
|
||||
return ToolOutput(
|
||||
status=status_key,
|
||||
@@ -1712,6 +1718,10 @@ When recommending searches, be specific about what information you need and why
|
||||
model_name = model_info.get("model_name")
|
||||
if model_name:
|
||||
metadata["model_used"] = model_name
|
||||
# FEATURE: Add provider_used metadata (Added for Issue #98)
|
||||
provider = model_info.get("provider")
|
||||
if provider:
|
||||
metadata["provider_used"] = provider.get_provider_type().value
|
||||
|
||||
return ToolOutput(
|
||||
status="success",
|
||||
@@ -1847,6 +1857,10 @@ When recommending searches, be specific about what information you need and why
|
||||
model_name = model_info.get("model_name")
|
||||
if model_name:
|
||||
metadata["model_used"] = model_name
|
||||
# FEATURE: Add provider_used metadata (Added for Issue #98)
|
||||
provider = model_info.get("provider")
|
||||
if provider:
|
||||
metadata["provider_used"] = provider.get_provider_type().value
|
||||
|
||||
return ToolOutput(
|
||||
status="continuation_available",
|
||||
@@ -1866,6 +1880,10 @@ When recommending searches, be specific about what information you need and why
|
||||
model_name = model_info.get("model_name")
|
||||
if model_name:
|
||||
metadata["model_used"] = model_name
|
||||
# FEATURE: Add provider_used metadata (Added for Issue #98)
|
||||
provider = model_info.get("provider")
|
||||
if provider:
|
||||
metadata["provider_used"] = provider.get_provider_type().value
|
||||
|
||||
return ToolOutput(
|
||||
status="success",
|
||||
@@ -2059,16 +2077,41 @@ When recommending searches, be specific about what information you need and why
|
||||
provider = ModelProviderRegistry.get_provider_for_model(model_name)
|
||||
|
||||
if not provider:
|
||||
# Try to determine provider from model name patterns
|
||||
# =====================================================================================
|
||||
# CRITICAL FALLBACK LOGIC - HANDLES PROVIDER AUTO-REGISTRATION
|
||||
# =====================================================================================
|
||||
#
|
||||
# This fallback logic auto-registers providers when no provider is found for a model.
|
||||
#
|
||||
# CRITICAL BUG PREVENTION (Fixed in Issue #98):
|
||||
# - Previously, providers were registered without checking API key availability
|
||||
# - This caused Google provider to be used for "flash" model even when only
|
||||
# OpenRouter API key was configured
|
||||
# - The fix below validates API keys BEFORE registering any provider
|
||||
#
|
||||
# TEST COVERAGE: tests/test_provider_routing_bugs.py
|
||||
# - test_fallback_routing_bug_reproduction()
|
||||
# - test_fallback_should_not_register_without_api_key()
|
||||
#
|
||||
# DO NOT REMOVE API KEY VALIDATION - This prevents incorrect provider routing
|
||||
# =====================================================================================
|
||||
import os
|
||||
|
||||
if "gemini" in model_name.lower() or model_name.lower() in ["flash", "pro"]:
|
||||
# Register Gemini provider if not already registered
|
||||
# CRITICAL: Validate API key before registering Google provider
|
||||
# This prevents auto-registration when user only has OpenRouter configured
|
||||
gemini_key = os.getenv("GEMINI_API_KEY")
|
||||
if gemini_key and gemini_key.strip() and gemini_key != "your_gemini_api_key_here":
|
||||
from providers.base import ProviderType
|
||||
from providers.gemini import GeminiModelProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||
provider = ModelProviderRegistry.get_provider(ProviderType.GOOGLE)
|
||||
elif "gpt" in model_name.lower() or "o3" in model_name.lower():
|
||||
# Register OpenAI provider if not already registered
|
||||
# CRITICAL: Validate API key before registering OpenAI provider
|
||||
# This prevents auto-registration when user only has OpenRouter configured
|
||||
openai_key = os.getenv("OPENAI_API_KEY")
|
||||
if openai_key and openai_key.strip() and openai_key != "your_openai_api_key_here":
|
||||
from providers.base import ProviderType
|
||||
from providers.openai_provider import OpenAIModelProvider
|
||||
|
||||
|
||||
@@ -657,6 +657,9 @@ class BaseWorkflowMixin(ABC):
|
||||
# Allow tools to customize the final response
|
||||
response_data = self.customize_workflow_response(response_data, request)
|
||||
|
||||
# Add metadata (provider_used and model_used) to workflow response
|
||||
self._add_workflow_metadata(response_data, arguments)
|
||||
|
||||
# Store in conversation memory
|
||||
if continuation_id:
|
||||
self.store_conversation_turn(continuation_id, response_data, request)
|
||||
@@ -670,6 +673,10 @@ class BaseWorkflowMixin(ABC):
|
||||
"error": str(e),
|
||||
"step_number": arguments.get("step_number", 0),
|
||||
}
|
||||
|
||||
# Add metadata to error responses too
|
||||
self._add_workflow_metadata(error_data, arguments)
|
||||
|
||||
return [TextContent(type="text", text=json.dumps(error_data, indent=2))]
|
||||
|
||||
# Hook methods for tool customization
|
||||
@@ -1047,6 +1054,67 @@ class BaseWorkflowMixin(ABC):
|
||||
images=self.get_request_images(request),
|
||||
)
|
||||
|
||||
def _add_workflow_metadata(self, response_data: dict, arguments: dict[str, Any]) -> None:
|
||||
"""
|
||||
Add metadata (provider_used and model_used) to workflow response.
|
||||
|
||||
This ensures workflow tools have the same metadata as regular tools,
|
||||
making it consistent across all tool types for tracking which provider
|
||||
and model were used for the response.
|
||||
|
||||
Args:
|
||||
response_data: The response data dictionary to modify
|
||||
arguments: The original arguments containing model context
|
||||
"""
|
||||
try:
|
||||
# Get model information from arguments (set by server.py)
|
||||
resolved_model_name = arguments.get("_resolved_model_name")
|
||||
model_context = arguments.get("_model_context")
|
||||
|
||||
if resolved_model_name and model_context:
|
||||
# Extract provider information from model context
|
||||
provider = model_context.provider
|
||||
provider_name = provider.get_provider_type().value if provider else "unknown"
|
||||
|
||||
# Create metadata dictionary
|
||||
metadata = {
|
||||
"tool_name": self.get_name(),
|
||||
"model_used": resolved_model_name,
|
||||
"provider_used": provider_name,
|
||||
}
|
||||
|
||||
# Add metadata to response
|
||||
response_data["metadata"] = metadata
|
||||
|
||||
logger.debug(
|
||||
f"[WORKFLOW_METADATA] {self.get_name()}: Added metadata - "
|
||||
f"model: {resolved_model_name}, provider: {provider_name}"
|
||||
)
|
||||
else:
|
||||
# Fallback - try to get model info from request
|
||||
request = self.get_workflow_request_model()(**arguments)
|
||||
model_name = self.get_request_model_name(request)
|
||||
|
||||
# Basic metadata without provider info
|
||||
metadata = {
|
||||
"tool_name": self.get_name(),
|
||||
"model_used": model_name,
|
||||
"provider_used": "unknown",
|
||||
}
|
||||
|
||||
response_data["metadata"] = metadata
|
||||
|
||||
logger.debug(
|
||||
f"[WORKFLOW_METADATA] {self.get_name()}: Added fallback metadata - "
|
||||
f"model: {model_name}, provider: unknown"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Don't fail the workflow if metadata addition fails
|
||||
logger.warning(f"[WORKFLOW_METADATA] {self.get_name()}: Failed to add metadata: {e}")
|
||||
# Still add basic metadata with tool name
|
||||
response_data["metadata"] = {"tool_name": self.get_name()}
|
||||
|
||||
def _extract_clean_workflow_content_for_history(self, response_data: dict) -> str:
|
||||
"""
|
||||
Extract clean content from workflow response suitable for conversation history.
|
||||
@@ -1393,19 +1461,23 @@ class BaseWorkflowMixin(ABC):
|
||||
try:
|
||||
# Common validation
|
||||
if not arguments:
|
||||
return [
|
||||
TextContent(type="text", text=json.dumps({"status": "error", "content": "No arguments provided"}))
|
||||
]
|
||||
error_data = {"status": "error", "content": "No arguments provided"}
|
||||
# Add basic metadata even for validation errors
|
||||
error_data["metadata"] = {"tool_name": self.get_name()}
|
||||
return [TextContent(type="text", text=json.dumps(error_data))]
|
||||
|
||||
# Delegate to execute_workflow
|
||||
return await self.execute_workflow(arguments)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in {self.get_name()} tool execution: {e}", exc_info=True)
|
||||
error_data = {"status": "error", "content": f"Error in {self.get_name()}: {str(e)}"}
|
||||
# Add metadata to error responses
|
||||
self._add_workflow_metadata(error_data, arguments)
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text=json.dumps({"status": "error", "content": f"Error in {self.get_name()}: {str(e)}"}),
|
||||
text=json.dumps(error_data),
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user