Fixed planner tool warnings when model was auto (model not required)
This commit is contained in:
@@ -14,9 +14,9 @@ import os
|
||||
# These values are used in server responses and for tracking releases
|
||||
# IMPORTANT: This is the single source of truth for version and author info
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
__version__ = "5.1.2"
|
||||
__version__ = "5.1.3"
|
||||
# Last update date in ISO format
|
||||
__updated__ = "2025-06-18"
|
||||
__updated__ = "2025-06-19"
|
||||
# Primary maintainer
|
||||
__author__ = "Fahad Gilani"
|
||||
|
||||
@@ -61,9 +61,7 @@ MODEL_CAPABILITIES_DESC = {
|
||||
"grokfast": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
|
||||
# Full model names also supported (for explicit specification)
|
||||
"gemini-2.5-flash": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
|
||||
"gemini-2.5-pro": (
|
||||
"Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"
|
||||
),
|
||||
"gemini-2.5-pro": ("Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"),
|
||||
}
|
||||
|
||||
# OpenRouter/Custom API Fallback Behavior:
|
||||
|
||||
@@ -555,6 +555,12 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
||||
# Consensus tool handles its own model configuration validation
|
||||
# No special handling needed at server level
|
||||
|
||||
# Skip model resolution for tools that don't require models (e.g., planner)
|
||||
if not tool.requires_model():
|
||||
logger.debug(f"Tool {name} doesn't require model resolution - skipping model validation")
|
||||
# Execute tool directly without model context
|
||||
return await tool.execute(arguments)
|
||||
|
||||
# Handle auto mode at MCP boundary - resolve to specific model
|
||||
if model_name.lower() == "auto":
|
||||
# Get tool category to determine appropriate model
|
||||
|
||||
@@ -567,9 +567,7 @@ class TestAutoModeComprehensive:
|
||||
mock_response.model_name = "gemini-2.5-flash" # The resolved name
|
||||
mock_response.usage = {"input_tokens": 10, "output_tokens": 5}
|
||||
# Mock _resolve_model_name to simulate alias resolution
|
||||
mock_provider._resolve_model_name = lambda alias: (
|
||||
"gemini-2.5-flash" if alias == "flash" else alias
|
||||
)
|
||||
mock_provider._resolve_model_name = lambda alias: ("gemini-2.5-flash" if alias == "flash" else alias)
|
||||
mock_provider.generate_content.return_value = mock_response
|
||||
|
||||
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
|
||||
|
||||
210
tests/test_auto_model_planner_fix.py
Normal file
210
tests/test_auto_model_planner_fix.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""
|
||||
Unit tests for the auto model planner fix.
|
||||
|
||||
This test confirms that the planner tool no longer fails when DEFAULT_MODEL is "auto"
|
||||
and only basic providers (Google/OpenAI) are configured, while ensuring other tools
|
||||
still properly require model resolution.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from mcp.types import TextContent
|
||||
|
||||
from tools.base import BaseTool
|
||||
from tools.chat import ChatTool
|
||||
from tools.planner import PlannerTool
|
||||
|
||||
|
||||
class TestAutoModelPlannerFix:
|
||||
"""Test the fix for auto model resolution with planner tool."""
|
||||
|
||||
def test_planner_requires_model_false(self):
|
||||
"""Test that planner tool returns False for requires_model."""
|
||||
planner = PlannerTool()
|
||||
assert planner.requires_model() is False
|
||||
|
||||
def test_chat_requires_model_true(self):
|
||||
"""Test that chat tool returns True for requires_model (default behavior)."""
|
||||
chat = ChatTool()
|
||||
assert chat.requires_model() is True
|
||||
|
||||
def test_base_tool_requires_model_default(self):
|
||||
"""Test that BaseTool default implementation returns True."""
|
||||
|
||||
# Create a mock tool that doesn't override requires_model
|
||||
class MockTool(BaseTool):
|
||||
def get_name(self):
|
||||
return "mock"
|
||||
|
||||
def get_description(self):
|
||||
return "Mock tool"
|
||||
|
||||
def get_input_schema(self):
|
||||
return {}
|
||||
|
||||
def get_system_prompt(self):
|
||||
return "Mock prompt"
|
||||
|
||||
def get_request_model(self):
|
||||
from tools.base import ToolRequest
|
||||
|
||||
return ToolRequest
|
||||
|
||||
async def prepare_prompt(self, request):
|
||||
return "Mock prompt"
|
||||
|
||||
mock_tool = MockTool()
|
||||
assert mock_tool.requires_model() is True
|
||||
|
||||
@patch("config.DEFAULT_MODEL", "auto")
|
||||
@patch("providers.registry.ModelProviderRegistry.get_provider_for_model")
|
||||
def test_auto_model_error_before_fix_simulation(self, mock_get_provider):
|
||||
"""
|
||||
Simulate the error that would occur before the fix.
|
||||
|
||||
This test simulates what would happen if server.py didn't check requires_model()
|
||||
and tried to resolve "auto" as a literal model name.
|
||||
"""
|
||||
# Mock the scenario where no provider is found for "auto"
|
||||
mock_get_provider.return_value = None
|
||||
|
||||
# This should return None, simulating the "No provider found for model auto" error
|
||||
result = mock_get_provider("auto")
|
||||
assert result is None
|
||||
|
||||
# Verify that the mock was called with "auto"
|
||||
mock_get_provider.assert_called_with("auto")
|
||||
|
||||
@patch("server.DEFAULT_MODEL", "auto")
|
||||
async def test_planner_execution_bypasses_model_resolution(self):
|
||||
"""
|
||||
Test that planner tool execution works even when DEFAULT_MODEL is "auto".
|
||||
|
||||
This test confirms that the fix allows planner to work regardless of
|
||||
model configuration since it doesn't need model resolution.
|
||||
"""
|
||||
planner = PlannerTool()
|
||||
|
||||
# Test with minimal planner arguments
|
||||
arguments = {"step": "Test planning step", "step_number": 1, "total_steps": 1, "next_step_required": False}
|
||||
|
||||
# This should work without any model resolution
|
||||
result = await planner.execute(arguments)
|
||||
|
||||
# Verify we got a result
|
||||
assert isinstance(result, list)
|
||||
assert len(result) > 0
|
||||
assert isinstance(result[0], TextContent)
|
||||
|
||||
# Parse the JSON response to verify it's valid
|
||||
import json
|
||||
|
||||
response_data = json.loads(result[0].text)
|
||||
assert response_data["status"] == "planning_success"
|
||||
assert response_data["step_number"] == 1
|
||||
|
||||
@patch("config.DEFAULT_MODEL", "auto")
|
||||
def test_server_model_resolution_logic(self):
|
||||
"""
|
||||
Test the server-side logic that checks requires_model() before model resolution.
|
||||
|
||||
This simulates the key fix in server.py where we check tool.requires_model()
|
||||
before attempting model resolution.
|
||||
"""
|
||||
planner = PlannerTool()
|
||||
chat = ChatTool()
|
||||
|
||||
# Simulate the server logic
|
||||
def simulate_server_model_resolution(tool, model_name):
|
||||
"""Simulate the fixed server logic from server.py"""
|
||||
if not tool.requires_model():
|
||||
# Skip model resolution for tools that don't require models
|
||||
return "SKIP_MODEL_RESOLUTION"
|
||||
else:
|
||||
# Would normally do model resolution here
|
||||
return f"RESOLVE_MODEL_{model_name}"
|
||||
|
||||
# Test planner (should skip model resolution)
|
||||
result = simulate_server_model_resolution(planner, "auto")
|
||||
assert result == "SKIP_MODEL_RESOLUTION"
|
||||
|
||||
# Test chat (should attempt model resolution)
|
||||
result = simulate_server_model_resolution(chat, "auto")
|
||||
assert result == "RESOLVE_MODEL_auto"
|
||||
|
||||
def test_provider_registry_auto_handling(self):
|
||||
"""
|
||||
Test that the provider registry correctly handles model resolution.
|
||||
|
||||
This tests the scenario where providers don't recognize "auto" as a model.
|
||||
"""
|
||||
from providers.registry import ModelProviderRegistry
|
||||
|
||||
# This should return None since "auto" is not a real model name
|
||||
provider = ModelProviderRegistry.get_provider_for_model("auto")
|
||||
assert provider is None, "Provider registry should not find a provider for literal 'auto'"
|
||||
|
||||
@patch("config.DEFAULT_MODEL", "auto")
|
||||
async def test_end_to_end_planner_with_auto_mode(self):
|
||||
"""
|
||||
End-to-end test of planner tool execution in auto mode.
|
||||
|
||||
This test verifies that the complete flow works when DEFAULT_MODEL is "auto"
|
||||
and the planner tool is used.
|
||||
"""
|
||||
planner = PlannerTool()
|
||||
|
||||
# Verify the tool doesn't require model resolution
|
||||
assert not planner.requires_model()
|
||||
|
||||
# Test a multi-step planning scenario
|
||||
step1_args = {
|
||||
"step": "Analyze the current system architecture",
|
||||
"step_number": 1,
|
||||
"total_steps": 3,
|
||||
"next_step_required": True,
|
||||
}
|
||||
|
||||
result1 = await planner.execute(step1_args)
|
||||
assert len(result1) > 0
|
||||
|
||||
# Parse and verify the response
|
||||
import json
|
||||
|
||||
response1 = json.loads(result1[0].text)
|
||||
assert response1["status"] == "planning_success"
|
||||
assert response1["next_step_required"] is True
|
||||
assert "continuation_id" in response1
|
||||
|
||||
# Test step 2 with continuation
|
||||
continuation_id = response1["continuation_id"]
|
||||
step2_args = {
|
||||
"step": "Design the microservices architecture",
|
||||
"step_number": 2,
|
||||
"total_steps": 3,
|
||||
"next_step_required": True,
|
||||
"continuation_id": continuation_id,
|
||||
}
|
||||
|
||||
result2 = await planner.execute(step2_args)
|
||||
assert len(result2) > 0
|
||||
|
||||
response2 = json.loads(result2[0].text)
|
||||
assert response2["status"] == "planning_success"
|
||||
assert response2["step_number"] == 2
|
||||
|
||||
def test_other_tools_still_require_models(self):
|
||||
"""
|
||||
Verify that other tools still properly require model resolution.
|
||||
|
||||
This ensures our fix doesn't break existing functionality.
|
||||
"""
|
||||
from tools.analyze import AnalyzeTool
|
||||
from tools.chat import ChatTool
|
||||
from tools.debug import DebugIssueTool
|
||||
|
||||
# Test various tools still require models
|
||||
tools_requiring_models = [ChatTool(), DebugIssueTool(), AnalyzeTool()]
|
||||
|
||||
for tool in tools_requiring_models:
|
||||
assert tool.requires_model() is True, f"{tool.get_name()} should require model resolution"
|
||||
@@ -513,9 +513,7 @@ class TestRegistryIntegration:
|
||||
ProviderType.GOOGLE: type(mock_gemini),
|
||||
}
|
||||
|
||||
with patch.dict(
|
||||
os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}
|
||||
):
|
||||
with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}):
|
||||
# Clear cached restriction service
|
||||
import utils.model_restrictions
|
||||
|
||||
|
||||
@@ -148,9 +148,7 @@ class TestGeminiProvider:
|
||||
|
||||
provider = GeminiModelProvider(api_key="test-key")
|
||||
|
||||
response = provider.generate_content(
|
||||
prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7
|
||||
)
|
||||
response = provider.generate_content(prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7)
|
||||
|
||||
assert isinstance(response, ModelResponse)
|
||||
assert response.content == "Generated content"
|
||||
|
||||
@@ -214,6 +214,18 @@ class BaseTool(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
def requires_model(self) -> bool:
|
||||
"""
|
||||
Return whether this tool requires AI model access.
|
||||
|
||||
Tools that override execute() to do pure data processing (like planner)
|
||||
should return False to skip model resolution at the MCP boundary.
|
||||
|
||||
Returns:
|
||||
bool: True if tool needs AI model access (default), False for data-only tools
|
||||
"""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _get_openrouter_registry(cls):
|
||||
"""Get cached OpenRouter registry instance."""
|
||||
|
||||
@@ -244,6 +244,16 @@ class PlannerTool(BaseTool):
|
||||
def get_default_thinking_mode(self) -> str:
|
||||
return "high" # Default to high thinking for comprehensive planning
|
||||
|
||||
def requires_model(self) -> bool:
|
||||
"""
|
||||
Planner tool doesn't require AI model access - it's pure data processing.
|
||||
|
||||
This prevents the server from trying to resolve model names like "auto"
|
||||
when the planner tool is used, since it overrides execute() and doesn't
|
||||
make any AI API calls.
|
||||
"""
|
||||
return False
|
||||
|
||||
async def execute(self, arguments: dict[str, Any]) -> list:
|
||||
"""
|
||||
Override execute to work like original TypeScript tool - no AI calls, just data processing.
|
||||
|
||||
Reference in New Issue
Block a user