my-pal-mcp-server/tests/test_auto_model_planner_fix.py

"""
Unit tests for the auto model planner fix.

This test confirms that the planner tool no longer fails when DEFAULT_MODEL is "auto"
and only basic providers (Google/OpenAI) are configured, while ensuring other tools
still properly require model resolution.
"""

from unittest.mock import patch

from mcp.types import TextContent

from tools.chat import ChatTool
from tools.planner import PlannerTool
from tools.shared.base_tool import BaseTool


class TestAutoModelPlannerFix:
    """Test the fix for auto model resolution with planner tool."""

    def test_planner_requires_model_false(self):
        """Test that planner tool returns False for requires_model."""
        planner = PlannerTool()
        assert planner.requires_model() is False

    def test_chat_requires_model_true(self):
        """Test that chat tool returns True for requires_model (default behavior)."""
        chat = ChatTool()
        assert chat.requires_model() is True

    def test_base_tool_requires_model_default(self):
        """Test that BaseTool default implementation returns True."""

        # Create a mock tool that doesn't override requires_model
        class MockTool(BaseTool):
            def get_name(self):
                return "mock"

            def get_description(self):
                return "Mock tool"

            def get_input_schema(self):
                return {}

            def get_system_prompt(self):
                return "Mock prompt"

            def get_request_model(self):
                from tools.shared.base_models import ToolRequest

                return ToolRequest

            async def prepare_prompt(self, request):
                return "Mock prompt"

        mock_tool = MockTool()
        assert mock_tool.requires_model() is True

    @patch("config.DEFAULT_MODEL", "auto")
    @patch("providers.registry.ModelProviderRegistry.get_provider_for_model")
    def test_auto_model_error_before_fix_simulation(self, mock_get_provider):
        """
        Simulate the error that would occur before the fix.

        This test simulates what would happen if server.py didn't check requires_model()
        and tried to resolve "auto" as a literal model name.
        """
        # Mock the scenario where no provider is found for "auto"
        mock_get_provider.return_value = None

        # This should return None, simulating the "No provider found for model auto" error
        result = mock_get_provider("auto")
        assert result is None

        # Verify that the mock was called with "auto"
        mock_get_provider.assert_called_with("auto")

    @patch("server.DEFAULT_MODEL", "auto")
    async def test_planner_execution_bypasses_model_resolution(self):
        """
        Test that planner tool execution works even when DEFAULT_MODEL is "auto".

        This test confirms that the fix allows planner to work regardless of
        model configuration since it doesn't need model resolution.
        """
        planner = PlannerTool()

        # Test with minimal planner arguments
        arguments = {"step": "Test planning step", "step_number": 1, "total_steps": 1, "next_step_required": False}

        # This should work without any model resolution
        result = await planner.execute(arguments)

        # Verify we got a result
        assert isinstance(result, list)
        assert len(result) > 0
        assert isinstance(result[0], TextContent)

        # Parse the JSON response to verify it's valid
        import json

        response_data = json.loads(result[0].text)
        assert response_data["status"] == "planning_complete"
        assert response_data["step_number"] == 1

    @patch("config.DEFAULT_MODEL", "auto")
    def test_server_model_resolution_logic(self):
        """
        Test the server-side logic that checks requires_model() before model resolution.

        This simulates the key fix in server.py where we check tool.requires_model()
        before attempting model resolution.
        """
        planner = PlannerTool()
        chat = ChatTool()

        # Simulate the server logic
        def simulate_server_model_resolution(tool, model_name):
            """Simulate the fixed server logic from server.py"""
            if not tool.requires_model():
                # Skip model resolution for tools that don't require models
                return "SKIP_MODEL_RESOLUTION"
            else:
                # Would normally do model resolution here
                return f"RESOLVE_MODEL_{model_name}"

        # Test planner (should skip model resolution)
        result = simulate_server_model_resolution(planner, "auto")
        assert result == "SKIP_MODEL_RESOLUTION"

        # Test chat (should attempt model resolution)
        result = simulate_server_model_resolution(chat, "auto")
        assert result == "RESOLVE_MODEL_auto"

    def test_provider_registry_auto_handling(self):
        """
        Test that the provider registry correctly handles model resolution.

        This tests the scenario where providers don't recognize "auto" as a model.
        """
        from providers.registry import ModelProviderRegistry

        # This should return None since "auto" is not a real model name
        provider = ModelProviderRegistry.get_provider_for_model("auto")
        assert provider is None, "Provider registry should not find a provider for literal 'auto'"

    @patch("config.DEFAULT_MODEL", "auto")
    async def test_end_to_end_planner_with_auto_mode(self):
        """
        End-to-end test of planner tool execution in auto mode.

        This test verifies that the complete flow works when DEFAULT_MODEL is "auto"
        and the planner tool is used.
        """
        planner = PlannerTool()

        # Verify the tool doesn't require model resolution
        assert not planner.requires_model()

        # Test a multi-step planning scenario
        step1_args = {
            "step": "Analyze the current system architecture",
            "step_number": 1,
            "total_steps": 3,
            "next_step_required": True,
        }

        result1 = await planner.execute(step1_args)
        assert len(result1) > 0

        # Parse and verify the response
        import json

        response1 = json.loads(result1[0].text)
        assert response1["status"] == "pause_for_planning"
        assert response1["next_step_required"] is True
        assert "continuation_id" in response1

        # Test step 2 with continuation
        continuation_id = response1["continuation_id"]
        step2_args = {
            "step": "Design the microservices architecture",
            "step_number": 2,
            "total_steps": 3,
            "next_step_required": True,
            "continuation_id": continuation_id,
        }

        result2 = await planner.execute(step2_args)
        assert len(result2) > 0

        response2 = json.loads(result2[0].text)
        assert response2["status"] == "pause_for_planning"
        assert response2["step_number"] == 2

    def test_other_tools_still_require_models(self):
        """
        Verify that other tools still properly require model resolution.

        This ensures our fix doesn't break existing functionality.
        Note: Debug tool requires model resolution for expert analysis phase.
        """
        from tools.analyze import AnalyzeTool
        from tools.chat import ChatTool
        from tools.debug import DebugIssueTool

        # Test various tools still require models
        tools_requiring_models = [ChatTool(), AnalyzeTool(), DebugIssueTool()]

        for tool in tools_requiring_models:
            assert tool.requires_model() is True, f"{tool.get_name()} should require model resolution"

        # Note: Debug tool requires model resolution for expert analysis phase
        # Only planner truly manages its own model calls and doesn't need resolution