From ec3a466b1c1434d4ea3475702c6c5033ae1a43e5 Mon Sep 17 00:00:00 2001 From: Fahad Date: Thu, 19 Jun 2025 06:14:04 +0400 Subject: [PATCH] Fixed planner tool warnings when model was auto (model not required) --- config.py | 8 +- server.py | 6 + tests/test_auto_mode_comprehensive.py | 4 +- tests/test_auto_model_planner_fix.py | 210 ++++++++++++++++++++++++++ tests/test_model_restrictions.py | 4 +- tests/test_providers.py | 4 +- tools/base.py | 12 ++ tools/planner.py | 10 ++ 8 files changed, 244 insertions(+), 14 deletions(-) create mode 100644 tests/test_auto_model_planner_fix.py diff --git a/config.py b/config.py index bf39757..38f310c 100644 --- a/config.py +++ b/config.py @@ -14,9 +14,9 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.1.2" +__version__ = "5.1.3" # Last update date in ISO format -__updated__ = "2025-06-18" +__updated__ = "2025-06-19" # Primary maintainer __author__ = "Fahad Gilani" @@ -61,9 +61,7 @@ MODEL_CAPABILITIES_DESC = { "grokfast": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive", # Full model names also supported (for explicit specification) "gemini-2.5-flash": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations", - "gemini-2.5-pro": ( - "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis" - ), + "gemini-2.5-pro": ("Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"), } # OpenRouter/Custom API Fallback Behavior: diff --git a/server.py b/server.py index 2987a49..0a1b27e 100644 --- a/server.py +++ b/server.py @@ -555,6 +555,12 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon # Consensus tool handles its own model configuration validation # No special handling needed at server level + # Skip model resolution for tools that don't require models (e.g., planner) + if not tool.requires_model(): + logger.debug(f"Tool {name} doesn't require model resolution - skipping model validation") + # Execute tool directly without model context + return await tool.execute(arguments) + # Handle auto mode at MCP boundary - resolve to specific model if model_name.lower() == "auto": # Get tool category to determine appropriate model diff --git a/tests/test_auto_mode_comprehensive.py b/tests/test_auto_mode_comprehensive.py index a6f281f..27daaff 100644 --- a/tests/test_auto_mode_comprehensive.py +++ b/tests/test_auto_mode_comprehensive.py @@ -567,9 +567,7 @@ class TestAutoModeComprehensive: mock_response.model_name = "gemini-2.5-flash" # The resolved name mock_response.usage = {"input_tokens": 10, "output_tokens": 5} # Mock _resolve_model_name to simulate alias resolution - mock_provider._resolve_model_name = lambda alias: ( - "gemini-2.5-flash" if alias == "flash" else alias - ) + mock_provider._resolve_model_name = lambda alias: ("gemini-2.5-flash" if alias == "flash" else alias) mock_provider.generate_content.return_value = mock_response with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider): diff --git a/tests/test_auto_model_planner_fix.py b/tests/test_auto_model_planner_fix.py new file mode 100644 index 0000000..0ca9b76 --- /dev/null +++ b/tests/test_auto_model_planner_fix.py @@ -0,0 +1,210 @@ +""" +Unit tests for the auto model planner fix. + +This test confirms that the planner tool no longer fails when DEFAULT_MODEL is "auto" +and only basic providers (Google/OpenAI) are configured, while ensuring other tools +still properly require model resolution. +""" + +from unittest.mock import patch + +from mcp.types import TextContent + +from tools.base import BaseTool +from tools.chat import ChatTool +from tools.planner import PlannerTool + + +class TestAutoModelPlannerFix: + """Test the fix for auto model resolution with planner tool.""" + + def test_planner_requires_model_false(self): + """Test that planner tool returns False for requires_model.""" + planner = PlannerTool() + assert planner.requires_model() is False + + def test_chat_requires_model_true(self): + """Test that chat tool returns True for requires_model (default behavior).""" + chat = ChatTool() + assert chat.requires_model() is True + + def test_base_tool_requires_model_default(self): + """Test that BaseTool default implementation returns True.""" + + # Create a mock tool that doesn't override requires_model + class MockTool(BaseTool): + def get_name(self): + return "mock" + + def get_description(self): + return "Mock tool" + + def get_input_schema(self): + return {} + + def get_system_prompt(self): + return "Mock prompt" + + def get_request_model(self): + from tools.base import ToolRequest + + return ToolRequest + + async def prepare_prompt(self, request): + return "Mock prompt" + + mock_tool = MockTool() + assert mock_tool.requires_model() is True + + @patch("config.DEFAULT_MODEL", "auto") + @patch("providers.registry.ModelProviderRegistry.get_provider_for_model") + def test_auto_model_error_before_fix_simulation(self, mock_get_provider): + """ + Simulate the error that would occur before the fix. + + This test simulates what would happen if server.py didn't check requires_model() + and tried to resolve "auto" as a literal model name. + """ + # Mock the scenario where no provider is found for "auto" + mock_get_provider.return_value = None + + # This should return None, simulating the "No provider found for model auto" error + result = mock_get_provider("auto") + assert result is None + + # Verify that the mock was called with "auto" + mock_get_provider.assert_called_with("auto") + + @patch("server.DEFAULT_MODEL", "auto") + async def test_planner_execution_bypasses_model_resolution(self): + """ + Test that planner tool execution works even when DEFAULT_MODEL is "auto". + + This test confirms that the fix allows planner to work regardless of + model configuration since it doesn't need model resolution. + """ + planner = PlannerTool() + + # Test with minimal planner arguments + arguments = {"step": "Test planning step", "step_number": 1, "total_steps": 1, "next_step_required": False} + + # This should work without any model resolution + result = await planner.execute(arguments) + + # Verify we got a result + assert isinstance(result, list) + assert len(result) > 0 + assert isinstance(result[0], TextContent) + + # Parse the JSON response to verify it's valid + import json + + response_data = json.loads(result[0].text) + assert response_data["status"] == "planning_success" + assert response_data["step_number"] == 1 + + @patch("config.DEFAULT_MODEL", "auto") + def test_server_model_resolution_logic(self): + """ + Test the server-side logic that checks requires_model() before model resolution. + + This simulates the key fix in server.py where we check tool.requires_model() + before attempting model resolution. + """ + planner = PlannerTool() + chat = ChatTool() + + # Simulate the server logic + def simulate_server_model_resolution(tool, model_name): + """Simulate the fixed server logic from server.py""" + if not tool.requires_model(): + # Skip model resolution for tools that don't require models + return "SKIP_MODEL_RESOLUTION" + else: + # Would normally do model resolution here + return f"RESOLVE_MODEL_{model_name}" + + # Test planner (should skip model resolution) + result = simulate_server_model_resolution(planner, "auto") + assert result == "SKIP_MODEL_RESOLUTION" + + # Test chat (should attempt model resolution) + result = simulate_server_model_resolution(chat, "auto") + assert result == "RESOLVE_MODEL_auto" + + def test_provider_registry_auto_handling(self): + """ + Test that the provider registry correctly handles model resolution. + + This tests the scenario where providers don't recognize "auto" as a model. + """ + from providers.registry import ModelProviderRegistry + + # This should return None since "auto" is not a real model name + provider = ModelProviderRegistry.get_provider_for_model("auto") + assert provider is None, "Provider registry should not find a provider for literal 'auto'" + + @patch("config.DEFAULT_MODEL", "auto") + async def test_end_to_end_planner_with_auto_mode(self): + """ + End-to-end test of planner tool execution in auto mode. + + This test verifies that the complete flow works when DEFAULT_MODEL is "auto" + and the planner tool is used. + """ + planner = PlannerTool() + + # Verify the tool doesn't require model resolution + assert not planner.requires_model() + + # Test a multi-step planning scenario + step1_args = { + "step": "Analyze the current system architecture", + "step_number": 1, + "total_steps": 3, + "next_step_required": True, + } + + result1 = await planner.execute(step1_args) + assert len(result1) > 0 + + # Parse and verify the response + import json + + response1 = json.loads(result1[0].text) + assert response1["status"] == "planning_success" + assert response1["next_step_required"] is True + assert "continuation_id" in response1 + + # Test step 2 with continuation + continuation_id = response1["continuation_id"] + step2_args = { + "step": "Design the microservices architecture", + "step_number": 2, + "total_steps": 3, + "next_step_required": True, + "continuation_id": continuation_id, + } + + result2 = await planner.execute(step2_args) + assert len(result2) > 0 + + response2 = json.loads(result2[0].text) + assert response2["status"] == "planning_success" + assert response2["step_number"] == 2 + + def test_other_tools_still_require_models(self): + """ + Verify that other tools still properly require model resolution. + + This ensures our fix doesn't break existing functionality. + """ + from tools.analyze import AnalyzeTool + from tools.chat import ChatTool + from tools.debug import DebugIssueTool + + # Test various tools still require models + tools_requiring_models = [ChatTool(), DebugIssueTool(), AnalyzeTool()] + + for tool in tools_requiring_models: + assert tool.requires_model() is True, f"{tool.get_name()} should require model resolution" diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py index 0df646a..b4b0e66 100644 --- a/tests/test_model_restrictions.py +++ b/tests/test_model_restrictions.py @@ -513,9 +513,7 @@ class TestRegistryIntegration: ProviderType.GOOGLE: type(mock_gemini), } - with patch.dict( - os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"} - ): + with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}): # Clear cached restriction service import utils.model_restrictions diff --git a/tests/test_providers.py b/tests/test_providers.py index 2b2c866..5401bc9 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -148,9 +148,7 @@ class TestGeminiProvider: provider = GeminiModelProvider(api_key="test-key") - response = provider.generate_content( - prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7 - ) + response = provider.generate_content(prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7) assert isinstance(response, ModelResponse) assert response.content == "Generated content" diff --git a/tools/base.py b/tools/base.py index 310c1fc..a9da378 100644 --- a/tools/base.py +++ b/tools/base.py @@ -214,6 +214,18 @@ class BaseTool(ABC): """ pass + def requires_model(self) -> bool: + """ + Return whether this tool requires AI model access. + + Tools that override execute() to do pure data processing (like planner) + should return False to skip model resolution at the MCP boundary. + + Returns: + bool: True if tool needs AI model access (default), False for data-only tools + """ + return True + @classmethod def _get_openrouter_registry(cls): """Get cached OpenRouter registry instance.""" diff --git a/tools/planner.py b/tools/planner.py index 63b2d31..8e74e2d 100644 --- a/tools/planner.py +++ b/tools/planner.py @@ -244,6 +244,16 @@ class PlannerTool(BaseTool): def get_default_thinking_mode(self) -> str: return "high" # Default to high thinking for comprehensive planning + def requires_model(self) -> bool: + """ + Planner tool doesn't require AI model access - it's pure data processing. + + This prevents the server from trying to resolve model names like "auto" + when the planner tool is used, since it overrides execute() and doesn't + make any AI API calls. + """ + return False + async def execute(self, arguments: dict[str, Any]) -> list: """ Override execute to work like original TypeScript tool - no AI calls, just data processing.