Fixed planner tool warnings when model was auto (model not required)

2025-06-19 06:14:04 +04:00
parent be1885c782
commit ec3a466b1c
8 changed files with 244 additions and 14 deletions
--- a/config.py
+++ b/config.py
@@ -14,9 +14,9 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.1.2"
+__version__ = "5.1.3"
 # Last update date in ISO format
-__updated__ = "2025-06-18"
+__updated__ = "2025-06-19"
 # Primary maintainer
 __author__ = "Fahad Gilani"

@@ -61,9 +61,7 @@ MODEL_CAPABILITIES_DESC = {
    "grokfast": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
    # Full model names also supported (for explicit specification)
    "gemini-2.5-flash": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
-    "gemini-2.5-pro": (
-        "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"
-    ),
+    "gemini-2.5-pro": ("Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"),
 }

 # OpenRouter/Custom API Fallback Behavior:
--- a/server.py
+++ b/server.py
@@ -555,6 +555,12 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
        # Consensus tool handles its own model configuration validation
        # No special handling needed at server level

+        # Skip model resolution for tools that don't require models (e.g., planner)
+        if not tool.requires_model():
+            logger.debug(f"Tool {name} doesn't require model resolution - skipping model validation")
+            # Execute tool directly without model context
+            return await tool.execute(arguments)
+
        # Handle auto mode at MCP boundary - resolve to specific model
        if model_name.lower() == "auto":
            # Get tool category to determine appropriate model
--- a/tests/test_auto_mode_comprehensive.py
+++ b/tests/test_auto_mode_comprehensive.py
@@ -567,9 +567,7 @@ class TestAutoModeComprehensive:
            mock_response.model_name = "gemini-2.5-flash"  # The resolved name
            mock_response.usage = {"input_tokens": 10, "output_tokens": 5}
            # Mock _resolve_model_name to simulate alias resolution
-            mock_provider._resolve_model_name = lambda alias: (
-                "gemini-2.5-flash" if alias == "flash" else alias
-            )
+            mock_provider._resolve_model_name = lambda alias: ("gemini-2.5-flash" if alias == "flash" else alias)
            mock_provider.generate_content.return_value = mock_response

            with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
--- a/tests/test_auto_model_planner_fix.py
+++ b/tests/test_auto_model_planner_fix.py
@@ -0,0 +1,210 @@
+"""
+Unit tests for the auto model planner fix.
+
+This test confirms that the planner tool no longer fails when DEFAULT_MODEL is "auto"
+and only basic providers (Google/OpenAI) are configured, while ensuring other tools
+still properly require model resolution.
+"""
+
+from unittest.mock import patch
+
+from mcp.types import TextContent
+
+from tools.base import BaseTool
+from tools.chat import ChatTool
+from tools.planner import PlannerTool
+
+
+class TestAutoModelPlannerFix:
+    """Test the fix for auto model resolution with planner tool."""
+
+    def test_planner_requires_model_false(self):
+        """Test that planner tool returns False for requires_model."""
+        planner = PlannerTool()
+        assert planner.requires_model() is False
+
+    def test_chat_requires_model_true(self):
+        """Test that chat tool returns True for requires_model (default behavior)."""
+        chat = ChatTool()
+        assert chat.requires_model() is True
+
+    def test_base_tool_requires_model_default(self):
+        """Test that BaseTool default implementation returns True."""
+
+        # Create a mock tool that doesn't override requires_model
+        class MockTool(BaseTool):
+            def get_name(self):
+                return "mock"
+
+            def get_description(self):
+                return "Mock tool"
+
+            def get_input_schema(self):
+                return {}
+
+            def get_system_prompt(self):
+                return "Mock prompt"
+
+            def get_request_model(self):
+                from tools.base import ToolRequest
+
+                return ToolRequest
+
+            async def prepare_prompt(self, request):
+                return "Mock prompt"
+
+        mock_tool = MockTool()
+        assert mock_tool.requires_model() is True
+
+    @patch("config.DEFAULT_MODEL", "auto")
+    @patch("providers.registry.ModelProviderRegistry.get_provider_for_model")
+    def test_auto_model_error_before_fix_simulation(self, mock_get_provider):
+        """
+        Simulate the error that would occur before the fix.
+
+        This test simulates what would happen if server.py didn't check requires_model()
+        and tried to resolve "auto" as a literal model name.
+        """
+        # Mock the scenario where no provider is found for "auto"
+        mock_get_provider.return_value = None
+
+        # This should return None, simulating the "No provider found for model auto" error
+        result = mock_get_provider("auto")
+        assert result is None
+
+        # Verify that the mock was called with "auto"
+        mock_get_provider.assert_called_with("auto")
+
+    @patch("server.DEFAULT_MODEL", "auto")
+    async def test_planner_execution_bypasses_model_resolution(self):
+        """
+        Test that planner tool execution works even when DEFAULT_MODEL is "auto".
+
+        This test confirms that the fix allows planner to work regardless of
+        model configuration since it doesn't need model resolution.
+        """
+        planner = PlannerTool()
+
+        # Test with minimal planner arguments
+        arguments = {"step": "Test planning step", "step_number": 1, "total_steps": 1, "next_step_required": False}
+
+        # This should work without any model resolution
+        result = await planner.execute(arguments)
+
+        # Verify we got a result
+        assert isinstance(result, list)
+        assert len(result) > 0
+        assert isinstance(result[0], TextContent)
+
+        # Parse the JSON response to verify it's valid
+        import json
+
+        response_data = json.loads(result[0].text)
+        assert response_data["status"] == "planning_success"
+        assert response_data["step_number"] == 1
+
+    @patch("config.DEFAULT_MODEL", "auto")
+    def test_server_model_resolution_logic(self):
+        """
+        Test the server-side logic that checks requires_model() before model resolution.
+
+        This simulates the key fix in server.py where we check tool.requires_model()
+        before attempting model resolution.
+        """
+        planner = PlannerTool()
+        chat = ChatTool()
+
+        # Simulate the server logic
+        def simulate_server_model_resolution(tool, model_name):
+            """Simulate the fixed server logic from server.py"""
+            if not tool.requires_model():
+                # Skip model resolution for tools that don't require models
+                return "SKIP_MODEL_RESOLUTION"
+            else:
+                # Would normally do model resolution here
+                return f"RESOLVE_MODEL_{model_name}"
+
+        # Test planner (should skip model resolution)
+        result = simulate_server_model_resolution(planner, "auto")
+        assert result == "SKIP_MODEL_RESOLUTION"
+
+        # Test chat (should attempt model resolution)
+        result = simulate_server_model_resolution(chat, "auto")
+        assert result == "RESOLVE_MODEL_auto"
+
+    def test_provider_registry_auto_handling(self):
+        """
+        Test that the provider registry correctly handles model resolution.
+
+        This tests the scenario where providers don't recognize "auto" as a model.
+        """
+        from providers.registry import ModelProviderRegistry
+
+        # This should return None since "auto" is not a real model name
+        provider = ModelProviderRegistry.get_provider_for_model("auto")
+        assert provider is None, "Provider registry should not find a provider for literal 'auto'"
+
+    @patch("config.DEFAULT_MODEL", "auto")
+    async def test_end_to_end_planner_with_auto_mode(self):
+        """
+        End-to-end test of planner tool execution in auto mode.
+
+        This test verifies that the complete flow works when DEFAULT_MODEL is "auto"
+        and the planner tool is used.
+        """
+        planner = PlannerTool()
+
+        # Verify the tool doesn't require model resolution
+        assert not planner.requires_model()
+
+        # Test a multi-step planning scenario
+        step1_args = {
+            "step": "Analyze the current system architecture",
+            "step_number": 1,
+            "total_steps": 3,
+            "next_step_required": True,
+        }
+
+        result1 = await planner.execute(step1_args)
+        assert len(result1) > 0
+
+        # Parse and verify the response
+        import json
+
+        response1 = json.loads(result1[0].text)
+        assert response1["status"] == "planning_success"
+        assert response1["next_step_required"] is True
+        assert "continuation_id" in response1
+
+        # Test step 2 with continuation
+        continuation_id = response1["continuation_id"]
+        step2_args = {
+            "step": "Design the microservices architecture",
+            "step_number": 2,
+            "total_steps": 3,
+            "next_step_required": True,
+            "continuation_id": continuation_id,
+        }
+
+        result2 = await planner.execute(step2_args)
+        assert len(result2) > 0
+
+        response2 = json.loads(result2[0].text)
+        assert response2["status"] == "planning_success"
+        assert response2["step_number"] == 2
+
+    def test_other_tools_still_require_models(self):
+        """
+        Verify that other tools still properly require model resolution.
+
+        This ensures our fix doesn't break existing functionality.
+        """
+        from tools.analyze import AnalyzeTool
+        from tools.chat import ChatTool
+        from tools.debug import DebugIssueTool
+
+        # Test various tools still require models
+        tools_requiring_models = [ChatTool(), DebugIssueTool(), AnalyzeTool()]
+
+        for tool in tools_requiring_models:
+            assert tool.requires_model() is True, f"{tool.get_name()} should require model resolution"
--- a/tests/test_model_restrictions.py
+++ b/tests/test_model_restrictions.py
@@ -513,9 +513,7 @@ class TestRegistryIntegration:
            ProviderType.GOOGLE: type(mock_gemini),
        }

-        with patch.dict(
-            os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}
-        ):
+        with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}):
            # Clear cached restriction service
            import utils.model_restrictions

--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -148,9 +148,7 @@ class TestGeminiProvider:

        provider = GeminiModelProvider(api_key="test-key")

-        response = provider.generate_content(
-            prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7
-        )
+        response = provider.generate_content(prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7)

        assert isinstance(response, ModelResponse)
        assert response.content == "Generated content"
--- a/tools/base.py
+++ b/tools/base.py
@@ -214,6 +214,18 @@ class BaseTool(ABC):
        """
        pass

+    def requires_model(self) -> bool:
+        """
+        Return whether this tool requires AI model access.
+
+        Tools that override execute() to do pure data processing (like planner)
+        should return False to skip model resolution at the MCP boundary.
+
+        Returns:
+            bool: True if tool needs AI model access (default), False for data-only tools
+        """
+        return True
+
    @classmethod
    def _get_openrouter_registry(cls):
        """Get cached OpenRouter registry instance."""
--- a/tools/planner.py
+++ b/tools/planner.py
@@ -244,6 +244,16 @@ class PlannerTool(BaseTool):
    def get_default_thinking_mode(self) -> str:
        return "high"  # Default to high thinking for comprehensive planning

+    def requires_model(self) -> bool:
+        """
+        Planner tool doesn't require AI model access - it's pure data processing.
+
+        This prevents the server from trying to resolve model names like "auto"
+        when the planner tool is used, since it overrides execute() and doesn't
+        make any AI API calls.
+        """
+        return False
+
    async def execute(self, arguments: dict[str, Any]) -> list:
        """
        Override execute to work like original TypeScript tool - no AI calls, just data processing.