From 668cb8b05245e39910e1b657599d6ff56a21e6e6 Mon Sep 17 00:00:00 2001 From: Fahad Date: Sat, 21 Jun 2025 09:15:18 +0400 Subject: [PATCH] Improved tracer workflow tool Updated 2.5 pro model name Add metadata to results Fix for https://github.com/BeehiveInnovations/zen-mcp-server/issues/98 --- conf/custom_models.json | 2 +- providers/openrouter.py | 31 ++- providers/registry.py | 16 +- tests/test_provider_routing_bugs.py | 382 ++++++++++++++++++++++++++++ tests/test_workflow_metadata.py | 300 ++++++++++++++++++++++ tools/base.py | 65 ++++- tools/workflow/workflow_mixin.py | 80 +++++- 7 files changed, 856 insertions(+), 20 deletions(-) create mode 100644 tests/test_provider_routing_bugs.py create mode 100644 tests/test_workflow_metadata.py diff --git a/conf/custom_models.json b/conf/custom_models.json index 8299a91..0667c35 100644 --- a/conf/custom_models.json +++ b/conf/custom_models.json @@ -82,7 +82,7 @@ "description": "Claude 3 Haiku - Fast and efficient with vision" }, { - "model_name": "google/gemini-2.5-pro-preview", + "model_name": "google/gemini-2.5-pro", "aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"], "context_window": 1048576, "supports_extended_thinking": false, diff --git a/providers/openrouter.py b/providers/openrouter.py index a674660..e464f4a 100644 --- a/providers/openrouter.py +++ b/providers/openrouter.py @@ -207,9 +207,34 @@ class OpenRouterProvider(OpenAICompatibleProvider): if self._registry: for model_name in self._registry.list_models(): - # Check restrictions if enabled - if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name): - continue + # ===================================================================================== + # CRITICAL ALIAS-AWARE RESTRICTION CHECKING (Fixed Issue #98) + # ===================================================================================== + # Previously, restrictions only checked full model names (e.g., "google/gemini-2.5-pro") + # but users specify aliases in OPENROUTER_ALLOWED_MODELS (e.g., "pro"). + # This caused "no models available" error even with valid restrictions. + # + # Fix: Check both model name AND all aliases against restrictions + # TEST COVERAGE: tests/test_provider_routing_bugs.py::TestOpenRouterAliasRestrictions + # ===================================================================================== + if restriction_service: + # Get model config to check aliases as well + model_config = self._registry.resolve(model_name) + allowed = False + + # Check if model name itself is allowed + if restriction_service.is_allowed(self.get_provider_type(), model_name): + allowed = True + + # CRITICAL: Also check aliases - this fixes the alias restriction bug + if not allowed and model_config and model_config.aliases: + for alias in model_config.aliases: + if restriction_service.is_allowed(self.get_provider_type(), alias): + allowed = True + break + + if not allowed: + continue models.append(model_name) diff --git a/providers/registry.py b/providers/registry.py index 4050d1c..981832f 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -179,7 +179,21 @@ class ModelProviderRegistry: continue for model_name in available: - if restriction_service and not restriction_service.is_allowed(provider_type, model_name): + # ===================================================================================== + # CRITICAL: Prevent double restriction filtering (Fixed Issue #98) + # ===================================================================================== + # Previously, both the provider AND registry applied restrictions, causing + # double-filtering that resulted in "no models available" errors. + # + # Logic: If respect_restrictions=True, provider already filtered models, + # so registry should NOT filter them again. + # TEST COVERAGE: tests/test_provider_routing_bugs.py::TestOpenRouterAliasRestrictions + # ===================================================================================== + if ( + restriction_service + and not respect_restrictions # Only filter if provider didn't already filter + and not restriction_service.is_allowed(provider_type, model_name) + ): logging.debug("Model %s filtered by restrictions", model_name) continue models[model_name] = provider_type diff --git a/tests/test_provider_routing_bugs.py b/tests/test_provider_routing_bugs.py new file mode 100644 index 0000000..42ab12a --- /dev/null +++ b/tests/test_provider_routing_bugs.py @@ -0,0 +1,382 @@ +""" +Tests that reproduce and prevent provider routing bugs. + +These tests specifically cover bugs that were found in production: +1. Fallback provider registration bypassing API key validation +2. OpenRouter alias-based restrictions not working +3. Double restriction filtering +4. Missing provider_used metadata +""" + +import os +from unittest.mock import Mock + +import pytest + +from providers.base import ProviderType +from providers.registry import ModelProviderRegistry +from tools.base import ToolRequest +from tools.chat import ChatTool + + +class MockRequest(ToolRequest): + """Mock request for testing.""" + + pass + + +class TestProviderRoutingBugs: + """Test cases that reproduce provider routing bugs.""" + + def setup_method(self): + """Set up clean state before each test.""" + # Clear restriction service cache + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + # Clear provider registry + registry = ModelProviderRegistry() + registry._providers.clear() + registry._initialized_providers.clear() + + def teardown_method(self): + """Clean up after each test.""" + # Clear restriction service cache + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + @pytest.mark.no_mock_provider + def test_fallback_routing_bug_reproduction(self): + """ + CRITICAL BUG TEST: Reproduce the bug where fallback logic auto-registers + Google provider for 'flash' model without checking GEMINI_API_KEY. + + Scenario: User has only OPENROUTER_API_KEY, requests 'flash' model. + Bug: System incorrectly uses Google provider instead of OpenRouter. + """ + # Save original environment + original_env = {} + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + original_env[key] = os.environ.get(key) + + try: + # Set up bug scenario: only OpenRouter API key + os.environ.pop("GEMINI_API_KEY", None) # No Google API key + os.environ.pop("OPENAI_API_KEY", None) + os.environ.pop("XAI_API_KEY", None) + os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key" + + # Register only OpenRouter provider (like in server.py:configure_providers) + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + # Create tool to test fallback logic + tool = ChatTool() + + # Test: Request 'flash' model - should use OpenRouter, not auto-register Google + provider = tool.get_model_provider("flash") + + # ASSERTION: Should get OpenRouter provider, not Google + assert provider is not None, "Should find a provider for 'flash' model" + assert provider.get_provider_type() == ProviderType.OPENROUTER, ( + f"Expected OpenRouter provider for 'flash' model with only OPENROUTER_API_KEY set, " + f"but got {provider.get_provider_type()}" + ) + + # Test common aliases that should all route to OpenRouter + test_models = ["flash", "pro", "o3", "o3-mini", "o4-mini"] + for model_name in test_models: + provider = tool.get_model_provider(model_name) + assert provider is not None, f"Should find provider for '{model_name}'" + assert provider.get_provider_type() == ProviderType.OPENROUTER, ( + f"Model '{model_name}' should route to OpenRouter when only OPENROUTER_API_KEY is set, " + f"but got {provider.get_provider_type()}" + ) + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + @pytest.mark.no_mock_provider + def test_fallback_should_not_register_without_api_key(self): + """ + Test that fallback logic correctly validates API keys before registering providers. + + This test ensures the fix in tools/base.py:2067-2081 works correctly. + """ + # Save original environment + original_env = {} + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + original_env[key] = os.environ.get(key) + + try: + # Set up scenario: NO API keys at all + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + os.environ.pop(key, None) + + # Create tool to test fallback logic + tool = ChatTool() + + # Test: Request 'flash' model with no API keys - should fail gracefully + with pytest.raises(ValueError, match="No provider found for model 'flash'"): + tool.get_model_provider("flash") + + # Test: Request 'o3' model with no API keys - should fail gracefully + with pytest.raises(ValueError, match="No provider found for model 'o3'"): + tool.get_model_provider("o3") + + # Verify no providers were auto-registered + registry = ModelProviderRegistry() + assert len(registry._providers) == 0, "No providers should be registered without API keys" + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + @pytest.mark.no_mock_provider + def test_mixed_api_keys_correct_routing(self): + """ + Test that when multiple API keys are available, provider routing works correctly. + """ + # Save original environment + original_env = {} + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + original_env[key] = os.environ.get(key) + + try: + # Set up scenario: Multiple API keys available + os.environ["GEMINI_API_KEY"] = "test-gemini-key" + os.environ["OPENAI_API_KEY"] = "test-openai-key" + os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key" + os.environ.pop("XAI_API_KEY", None) + + # Register providers in priority order (like server.py) + from providers.gemini import GeminiModelProvider + from providers.openai_provider import OpenAIModelProvider + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) + ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + tool = ChatTool() + + # Test priority order: Native APIs should be preferred over OpenRouter + # Google models should use Google provider + flash_provider = tool.get_model_provider("flash") + assert ( + flash_provider.get_provider_type() == ProviderType.GOOGLE + ), "When both Google and OpenRouter API keys are available, 'flash' should prefer Google provider" + + # OpenAI models should use OpenAI provider + o3_provider = tool.get_model_provider("o3") + assert ( + o3_provider.get_provider_type() == ProviderType.OPENAI + ), "When both OpenAI and OpenRouter API keys are available, 'o3' should prefer OpenAI provider" + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + +class TestOpenRouterAliasRestrictions: + """Test OpenRouter model restrictions with aliases - reproduces restriction bug.""" + + def setup_method(self): + """Set up clean state before each test.""" + # Clear restriction service cache + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + # Clear provider registry + registry = ModelProviderRegistry() + registry._providers.clear() + registry._initialized_providers.clear() + + def teardown_method(self): + """Clean up after each test.""" + # Clear restriction service cache + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + @pytest.mark.no_mock_provider + def test_openrouter_alias_restrictions_bug_reproduction(self): + """ + CRITICAL BUG TEST: Reproduce the bug where OpenRouter restrictions with aliases + resulted in "no models available" error. + + Bug scenario: OPENROUTER_ALLOWED_MODELS=o3-mini,pro,flash,o4-mini,o3 + Expected: 5 models available (aliases resolve to full names) + Bug: 0 models available due to alias resolution failure + """ + # Save original environment + original_env = {} + for key in [ + "GEMINI_API_KEY", + "OPENAI_API_KEY", + "XAI_API_KEY", + "OPENROUTER_API_KEY", + "OPENROUTER_ALLOWED_MODELS", + ]: + original_env[key] = os.environ.get(key) + + try: + # Set up bug scenario: Only OpenRouter with alias-based restrictions + os.environ.pop("GEMINI_API_KEY", None) + os.environ.pop("OPENAI_API_KEY", None) + os.environ.pop("XAI_API_KEY", None) + os.environ["OPENROUTER_API_KEY"] = "test-key" + os.environ["OPENROUTER_ALLOWED_MODELS"] = "o3-mini,pro,gpt4.1,flash,o4-mini,o3" # User's exact config + + # Register OpenRouter provider + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + # Test: Get available models with restrictions + available_models = ModelProviderRegistry.get_available_models(respect_restrictions=True) + + # ASSERTION: Should have models available, not 0 + assert len(available_models) > 0, ( + f"Expected models available with alias restrictions 'o3-mini,pro,gpt4.1,flash,o4-mini,o3', " + f"but got {len(available_models)} models. Available: {list(available_models.keys())}" + ) + + # Expected aliases that should resolve to models: + # o3-mini -> openai/o3-mini + # pro -> google/gemini-2.5-pro + # flash -> google/gemini-2.5-flash + # o4-mini -> openai/o4-mini + # o3 -> openai/o3 + # gpt4.1 -> should not exist (expected to be filtered out) + + expected_models = { + "openai/o3-mini", + "google/gemini-2.5-pro", + "google/gemini-2.5-flash", + "openai/o4-mini", + "openai/o3", + } + + available_model_names = set(available_models.keys()) + + # Should have at least the resolvable aliases (5 out of 6) + assert len(available_model_names) >= 5, ( + f"Expected at least 5 models from alias restrictions, got {len(available_model_names)}: " + f"{available_model_names}" + ) + + # Check that expected models are present + missing_models = expected_models - available_model_names + assert len(missing_models) == 0, ( + f"Missing expected models from alias restrictions: {missing_models}. " + f"Available: {available_model_names}" + ) + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + @pytest.mark.no_mock_provider + def test_openrouter_mixed_alias_and_full_names(self): + """Test OpenRouter restrictions with mix of aliases and full model names.""" + # Save original environment + original_env = {} + for key in [ + "GEMINI_API_KEY", + "OPENAI_API_KEY", + "XAI_API_KEY", + "OPENROUTER_API_KEY", + "OPENROUTER_ALLOWED_MODELS", + ]: + original_env[key] = os.environ.get(key) + + try: + # Set up mixed restrictions: some aliases, some full names + os.environ.pop("GEMINI_API_KEY", None) + os.environ.pop("OPENAI_API_KEY", None) + os.environ.pop("XAI_API_KEY", None) + os.environ["OPENROUTER_API_KEY"] = "test-key" + os.environ["OPENROUTER_ALLOWED_MODELS"] = "o3-mini,anthropic/claude-3-opus,flash" + + # Register OpenRouter provider + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + # Test: Get available models + available_models = ModelProviderRegistry.get_available_models(respect_restrictions=True) + + expected_models = { + "openai/o3-mini", # from alias + "anthropic/claude-3-opus", # full name + "google/gemini-2.5-flash", # from alias + } + + available_model_names = set(available_models.keys()) + + assert ( + available_model_names == expected_models + ), f"Expected models {expected_models}, got {available_model_names}" + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + +class TestProviderMetadataBug: + """Test for missing provider_used metadata bug.""" + + def test_provider_used_metadata_included(self): + """ + Test that provider_used metadata is included in tool responses. + + Bug: Only model_used was included, provider_used was missing. + Fix: Added provider_used field in tools/base.py + """ + # Test the actual _parse_response method with model_info + tool = ChatTool() + + # Create mock provider + mock_provider = Mock() + mock_provider.get_provider_type.return_value = ProviderType.OPENROUTER + + # Create model_info like the execute method does + model_info = {"provider": mock_provider, "model_name": "test-model", "model_response": Mock()} + + # Test _parse_response directly with a simple response + request = MockRequest() + result = tool._parse_response("Test response", request, model_info) + + # Verify metadata includes both model_used and provider_used + assert hasattr(result, "metadata"), "ToolOutput should have metadata" + assert result.metadata is not None, "Metadata should not be None" + assert "model_used" in result.metadata, "Metadata should include model_used" + assert result.metadata["model_used"] == "test-model", "model_used should be correct" + assert "provider_used" in result.metadata, "Metadata should include provider_used (bug fix)" + assert result.metadata["provider_used"] == "openrouter", "provider_used should be correct" diff --git a/tests/test_workflow_metadata.py b/tests/test_workflow_metadata.py new file mode 100644 index 0000000..7f1e139 --- /dev/null +++ b/tests/test_workflow_metadata.py @@ -0,0 +1,300 @@ +""" +Tests for workflow tool metadata functionality. + +This test ensures that workflow tools include metadata (provider_used and model_used) +in their responses, similar to regular tools, for consistent tracking across all tool types. +""" + +import json +import os + +import pytest + +from providers.base import ProviderType +from providers.registry import ModelProviderRegistry +from tools.debug import DebugIssueTool + + +class TestWorkflowMetadata: + """Test cases for workflow tool metadata functionality.""" + + def setup_method(self): + """Set up clean state before each test.""" + # Clear restriction service cache + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + # Clear provider registry + registry = ModelProviderRegistry() + registry._providers.clear() + registry._initialized_providers.clear() + + def teardown_method(self): + """Clean up after each test.""" + # Clear restriction service cache + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + @pytest.mark.no_mock_provider + def test_workflow_metadata_in_response(self): + """ + Test that workflow tools include metadata in their responses. + + This test verifies that workflow tools (like debug) include provider_used + and model_used metadata in their responses, ensuring consistency with + regular tools for tracking purposes. + """ + # Save original environment + original_env = {} + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + original_env[key] = os.environ.get(key) + + try: + # Set up test environment with OpenRouter API key + os.environ.pop("GEMINI_API_KEY", None) + os.environ.pop("OPENAI_API_KEY", None) + os.environ.pop("XAI_API_KEY", None) + os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key" + + # Register OpenRouter provider + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + # Create debug tool + debug_tool = DebugIssueTool() + + # Create mock model context like server.py does + from utils.model_context import ModelContext + + model_name = "flash" + model_context = ModelContext(model_name) + + # Create arguments with model context (like server.py provides) + arguments = { + "step": "Investigating the test issue to check metadata functionality", + "step_number": 1, + "total_steps": 2, + "next_step_required": False, # Final step to trigger completion + "findings": "Initial findings for test", + "model": model_name, + "confidence": "high", + "_model_context": model_context, + "_resolved_model_name": model_name, + } + + # Execute the workflow tool + import asyncio + + result = asyncio.run(debug_tool.execute_workflow(arguments)) + + # Parse the JSON response + assert len(result) == 1 + response_text = result[0].text + response_data = json.loads(response_text) + + # Verify metadata is present + assert "metadata" in response_data, "Workflow response should include metadata" + metadata = response_data["metadata"] + + # Verify required metadata fields + assert "tool_name" in metadata, "Metadata should include tool_name" + assert "model_used" in metadata, "Metadata should include model_used" + assert "provider_used" in metadata, "Metadata should include provider_used" + + # Verify metadata values + assert metadata["tool_name"] == "debug", "tool_name should be 'debug'" + assert metadata["model_used"] == model_name, f"model_used should be '{model_name}'" + assert metadata["provider_used"] == "openrouter", "provider_used should be 'openrouter'" + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + @pytest.mark.no_mock_provider + def test_workflow_metadata_in_error_response(self): + """ + Test that workflow tools include metadata even in error responses. + """ + # Save original environment + original_env = {} + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + original_env[key] = os.environ.get(key) + + try: + # Set up test environment with OpenRouter API key + os.environ.pop("GEMINI_API_KEY", None) + os.environ.pop("OPENAI_API_KEY", None) + os.environ.pop("XAI_API_KEY", None) + os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key" + + # Register OpenRouter provider + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + # Create debug tool + debug_tool = DebugIssueTool() + + # Create arguments with invalid data to trigger error + model_name = "flash" + arguments = { + "step": "Test step", + "step_number": "invalid", # This should cause an error during validation + "_resolved_model_name": model_name, + } + + # Execute the workflow tool - should fail gracefully + import asyncio + + result = asyncio.run(debug_tool.execute(arguments)) + + # Parse the JSON response + assert len(result) == 1 + response_text = result[0].text + response_data = json.loads(response_text) + + # Verify it's an error response with metadata + assert "status" in response_data + assert "error" in response_data or "content" in response_data + assert "metadata" in response_data, "Error responses should include metadata" + + metadata = response_data["metadata"] + assert "tool_name" in metadata, "Error metadata should include tool_name" + assert metadata["tool_name"] == "debug", "tool_name should be 'debug'" + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + @pytest.mark.no_mock_provider + def test_workflow_metadata_fallback_handling(self): + """ + Test that workflow tools handle metadata gracefully when model context is missing. + """ + # Create debug tool + debug_tool = DebugIssueTool() + + # Create arguments without model context (fallback scenario) + arguments = { + "step": "Test step without model context", + "step_number": 1, + "total_steps": 1, + "next_step_required": False, + "findings": "Test findings", + "model": "flash", + "confidence": "low", + # No _model_context or _resolved_model_name + } + + # Execute the workflow tool + import asyncio + + result = asyncio.run(debug_tool.execute_workflow(arguments)) + + # Parse the JSON response + assert len(result) == 1 + response_text = result[0].text + response_data = json.loads(response_text) + + # Verify metadata is still present with fallback values + assert "metadata" in response_data, "Workflow response should include metadata even in fallback" + metadata = response_data["metadata"] + + # Verify fallback metadata + assert "tool_name" in metadata, "Fallback metadata should include tool_name" + assert "model_used" in metadata, "Fallback metadata should include model_used" + assert "provider_used" in metadata, "Fallback metadata should include provider_used" + + assert metadata["tool_name"] == "debug", "tool_name should be 'debug'" + assert metadata["model_used"] == "flash", "model_used should be from request" + assert metadata["provider_used"] == "unknown", "provider_used should be 'unknown' in fallback" + + @pytest.mark.no_mock_provider + def test_workflow_metadata_preserves_existing_response_fields(self): + """ + Test that adding metadata doesn't interfere with existing workflow response fields. + """ + # Save original environment + original_env = {} + for key in ["GEMINI_API_KEY", "OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]: + original_env[key] = os.environ.get(key) + + try: + # Set up test environment + os.environ.pop("GEMINI_API_KEY", None) + os.environ.pop("OPENAI_API_KEY", None) + os.environ.pop("XAI_API_KEY", None) + os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key" + + # Register OpenRouter provider + from providers.openrouter import OpenRouterProvider + + ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) + + # Create debug tool + debug_tool = DebugIssueTool() + + # Create mock model context + from utils.model_context import ModelContext + + model_name = "flash" + model_context = ModelContext(model_name) + + # Create arguments for intermediate step + arguments = { + "step": "Testing intermediate step for metadata preservation", + "step_number": 1, + "total_steps": 3, + "next_step_required": True, # Intermediate step + "findings": "Intermediate findings", + "model": model_name, + "confidence": "medium", + "_model_context": model_context, + "_resolved_model_name": model_name, + } + + # Execute the workflow tool + import asyncio + + result = asyncio.run(debug_tool.execute_workflow(arguments)) + + # Parse the JSON response + assert len(result) == 1 + response_text = result[0].text + response_data = json.loads(response_text) + + # Verify standard workflow fields are preserved + assert "status" in response_data, "Standard workflow status should be preserved" + assert "step_number" in response_data, "Standard workflow step_number should be preserved" + assert "total_steps" in response_data, "Standard workflow total_steps should be preserved" + assert "next_step_required" in response_data, "Standard workflow next_step_required should be preserved" + + # Verify metadata was added without breaking existing fields + assert "metadata" in response_data, "Metadata should be added" + metadata = response_data["metadata"] + assert metadata["tool_name"] == "debug" + assert metadata["model_used"] == model_name + assert metadata["provider_used"] == "openrouter" + + # Verify specific intermediate step fields + assert response_data["next_step_required"] is True, "next_step_required should be preserved" + assert response_data["step_number"] == 1, "step_number should be preserved" + + finally: + # Restore original environment + for key, value in original_env.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value diff --git a/tools/base.py b/tools/base.py index bebfc7e..ab75a1f 100644 --- a/tools/base.py +++ b/tools/base.py @@ -1635,6 +1635,12 @@ When recommending searches, be specific about what information you need and why model_name = model_info.get("model_name") if model_name: metadata["model_used"] = model_name + # FEATURE: Add provider_used metadata (Added for Issue #98) + # This shows which provider (google, openai, openrouter, etc.) handled the request + # TEST COVERAGE: tests/test_provider_routing_bugs.py::TestProviderMetadataBug + provider = model_info.get("provider") + if provider: + metadata["provider_used"] = provider.get_provider_type().value return ToolOutput( status=status_key, @@ -1712,6 +1718,10 @@ When recommending searches, be specific about what information you need and why model_name = model_info.get("model_name") if model_name: metadata["model_used"] = model_name + # FEATURE: Add provider_used metadata (Added for Issue #98) + provider = model_info.get("provider") + if provider: + metadata["provider_used"] = provider.get_provider_type().value return ToolOutput( status="success", @@ -1847,6 +1857,10 @@ When recommending searches, be specific about what information you need and why model_name = model_info.get("model_name") if model_name: metadata["model_used"] = model_name + # FEATURE: Add provider_used metadata (Added for Issue #98) + provider = model_info.get("provider") + if provider: + metadata["provider_used"] = provider.get_provider_type().value return ToolOutput( status="continuation_available", @@ -1866,6 +1880,10 @@ When recommending searches, be specific about what information you need and why model_name = model_info.get("model_name") if model_name: metadata["model_used"] = model_name + # FEATURE: Add provider_used metadata (Added for Issue #98) + provider = model_info.get("provider") + if provider: + metadata["provider_used"] = provider.get_provider_type().value return ToolOutput( status="success", @@ -2059,21 +2077,46 @@ When recommending searches, be specific about what information you need and why provider = ModelProviderRegistry.get_provider_for_model(model_name) if not provider: - # Try to determine provider from model name patterns + # ===================================================================================== + # CRITICAL FALLBACK LOGIC - HANDLES PROVIDER AUTO-REGISTRATION + # ===================================================================================== + # + # This fallback logic auto-registers providers when no provider is found for a model. + # + # CRITICAL BUG PREVENTION (Fixed in Issue #98): + # - Previously, providers were registered without checking API key availability + # - This caused Google provider to be used for "flash" model even when only + # OpenRouter API key was configured + # - The fix below validates API keys BEFORE registering any provider + # + # TEST COVERAGE: tests/test_provider_routing_bugs.py + # - test_fallback_routing_bug_reproduction() + # - test_fallback_should_not_register_without_api_key() + # + # DO NOT REMOVE API KEY VALIDATION - This prevents incorrect provider routing + # ===================================================================================== + import os + if "gemini" in model_name.lower() or model_name.lower() in ["flash", "pro"]: - # Register Gemini provider if not already registered - from providers.base import ProviderType - from providers.gemini import GeminiModelProvider + # CRITICAL: Validate API key before registering Google provider + # This prevents auto-registration when user only has OpenRouter configured + gemini_key = os.getenv("GEMINI_API_KEY") + if gemini_key and gemini_key.strip() and gemini_key != "your_gemini_api_key_here": + from providers.base import ProviderType + from providers.gemini import GeminiModelProvider - ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) - provider = ModelProviderRegistry.get_provider(ProviderType.GOOGLE) + ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) + provider = ModelProviderRegistry.get_provider(ProviderType.GOOGLE) elif "gpt" in model_name.lower() or "o3" in model_name.lower(): - # Register OpenAI provider if not already registered - from providers.base import ProviderType - from providers.openai_provider import OpenAIModelProvider + # CRITICAL: Validate API key before registering OpenAI provider + # This prevents auto-registration when user only has OpenRouter configured + openai_key = os.getenv("OPENAI_API_KEY") + if openai_key and openai_key.strip() and openai_key != "your_openai_api_key_here": + from providers.base import ProviderType + from providers.openai_provider import OpenAIModelProvider - ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) - provider = ModelProviderRegistry.get_provider(ProviderType.OPENAI) + ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) + provider = ModelProviderRegistry.get_provider(ProviderType.OPENAI) if not provider: raise ValueError( diff --git a/tools/workflow/workflow_mixin.py b/tools/workflow/workflow_mixin.py index ac7b0ec..f3bd6c8 100644 --- a/tools/workflow/workflow_mixin.py +++ b/tools/workflow/workflow_mixin.py @@ -657,6 +657,9 @@ class BaseWorkflowMixin(ABC): # Allow tools to customize the final response response_data = self.customize_workflow_response(response_data, request) + # Add metadata (provider_used and model_used) to workflow response + self._add_workflow_metadata(response_data, arguments) + # Store in conversation memory if continuation_id: self.store_conversation_turn(continuation_id, response_data, request) @@ -670,6 +673,10 @@ class BaseWorkflowMixin(ABC): "error": str(e), "step_number": arguments.get("step_number", 0), } + + # Add metadata to error responses too + self._add_workflow_metadata(error_data, arguments) + return [TextContent(type="text", text=json.dumps(error_data, indent=2))] # Hook methods for tool customization @@ -1047,6 +1054,67 @@ class BaseWorkflowMixin(ABC): images=self.get_request_images(request), ) + def _add_workflow_metadata(self, response_data: dict, arguments: dict[str, Any]) -> None: + """ + Add metadata (provider_used and model_used) to workflow response. + + This ensures workflow tools have the same metadata as regular tools, + making it consistent across all tool types for tracking which provider + and model were used for the response. + + Args: + response_data: The response data dictionary to modify + arguments: The original arguments containing model context + """ + try: + # Get model information from arguments (set by server.py) + resolved_model_name = arguments.get("_resolved_model_name") + model_context = arguments.get("_model_context") + + if resolved_model_name and model_context: + # Extract provider information from model context + provider = model_context.provider + provider_name = provider.get_provider_type().value if provider else "unknown" + + # Create metadata dictionary + metadata = { + "tool_name": self.get_name(), + "model_used": resolved_model_name, + "provider_used": provider_name, + } + + # Add metadata to response + response_data["metadata"] = metadata + + logger.debug( + f"[WORKFLOW_METADATA] {self.get_name()}: Added metadata - " + f"model: {resolved_model_name}, provider: {provider_name}" + ) + else: + # Fallback - try to get model info from request + request = self.get_workflow_request_model()(**arguments) + model_name = self.get_request_model_name(request) + + # Basic metadata without provider info + metadata = { + "tool_name": self.get_name(), + "model_used": model_name, + "provider_used": "unknown", + } + + response_data["metadata"] = metadata + + logger.debug( + f"[WORKFLOW_METADATA] {self.get_name()}: Added fallback metadata - " + f"model: {model_name}, provider: unknown" + ) + + except Exception as e: + # Don't fail the workflow if metadata addition fails + logger.warning(f"[WORKFLOW_METADATA] {self.get_name()}: Failed to add metadata: {e}") + # Still add basic metadata with tool name + response_data["metadata"] = {"tool_name": self.get_name()} + def _extract_clean_workflow_content_for_history(self, response_data: dict) -> str: """ Extract clean content from workflow response suitable for conversation history. @@ -1393,19 +1461,23 @@ class BaseWorkflowMixin(ABC): try: # Common validation if not arguments: - return [ - TextContent(type="text", text=json.dumps({"status": "error", "content": "No arguments provided"})) - ] + error_data = {"status": "error", "content": "No arguments provided"} + # Add basic metadata even for validation errors + error_data["metadata"] = {"tool_name": self.get_name()} + return [TextContent(type="text", text=json.dumps(error_data))] # Delegate to execute_workflow return await self.execute_workflow(arguments) except Exception as e: logger.error(f"Error in {self.get_name()} tool execution: {e}", exc_info=True) + error_data = {"status": "error", "content": f"Error in {self.get_name()}: {str(e)}"} + # Add metadata to error responses + self._add_workflow_metadata(error_data, arguments) return [ TextContent( type="text", - text=json.dumps({"status": "error", "content": f"Error in {self.get_name()}: {str(e)}"}), + text=json.dumps(error_data), ) ]