Fixed planner tool warnings when model was auto (model not required)

This commit is contained in:
Fahad
2025-06-19 06:14:04 +04:00
parent be1885c782
commit ec3a466b1c
8 changed files with 244 additions and 14 deletions

View File

@@ -14,9 +14,9 @@ import os
# These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "5.1.2"
__version__ = "5.1.3"
# Last update date in ISO format
__updated__ = "2025-06-18"
__updated__ = "2025-06-19"
# Primary maintainer
__author__ = "Fahad Gilani"
@@ -61,9 +61,7 @@ MODEL_CAPABILITIES_DESC = {
"grokfast": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
# Full model names also supported (for explicit specification)
"gemini-2.5-flash": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
"gemini-2.5-pro": (
"Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"
),
"gemini-2.5-pro": ("Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"),
}
# OpenRouter/Custom API Fallback Behavior:

View File

@@ -555,6 +555,12 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
# Consensus tool handles its own model configuration validation
# No special handling needed at server level
# Skip model resolution for tools that don't require models (e.g., planner)
if not tool.requires_model():
logger.debug(f"Tool {name} doesn't require model resolution - skipping model validation")
# Execute tool directly without model context
return await tool.execute(arguments)
# Handle auto mode at MCP boundary - resolve to specific model
if model_name.lower() == "auto":
# Get tool category to determine appropriate model

View File

@@ -567,9 +567,7 @@ class TestAutoModeComprehensive:
mock_response.model_name = "gemini-2.5-flash" # The resolved name
mock_response.usage = {"input_tokens": 10, "output_tokens": 5}
# Mock _resolve_model_name to simulate alias resolution
mock_provider._resolve_model_name = lambda alias: (
"gemini-2.5-flash" if alias == "flash" else alias
)
mock_provider._resolve_model_name = lambda alias: ("gemini-2.5-flash" if alias == "flash" else alias)
mock_provider.generate_content.return_value = mock_response
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):

View File

@@ -0,0 +1,210 @@
"""
Unit tests for the auto model planner fix.
This test confirms that the planner tool no longer fails when DEFAULT_MODEL is "auto"
and only basic providers (Google/OpenAI) are configured, while ensuring other tools
still properly require model resolution.
"""
from unittest.mock import patch
from mcp.types import TextContent
from tools.base import BaseTool
from tools.chat import ChatTool
from tools.planner import PlannerTool
class TestAutoModelPlannerFix:
"""Test the fix for auto model resolution with planner tool."""
def test_planner_requires_model_false(self):
"""Test that planner tool returns False for requires_model."""
planner = PlannerTool()
assert planner.requires_model() is False
def test_chat_requires_model_true(self):
"""Test that chat tool returns True for requires_model (default behavior)."""
chat = ChatTool()
assert chat.requires_model() is True
def test_base_tool_requires_model_default(self):
"""Test that BaseTool default implementation returns True."""
# Create a mock tool that doesn't override requires_model
class MockTool(BaseTool):
def get_name(self):
return "mock"
def get_description(self):
return "Mock tool"
def get_input_schema(self):
return {}
def get_system_prompt(self):
return "Mock prompt"
def get_request_model(self):
from tools.base import ToolRequest
return ToolRequest
async def prepare_prompt(self, request):
return "Mock prompt"
mock_tool = MockTool()
assert mock_tool.requires_model() is True
@patch("config.DEFAULT_MODEL", "auto")
@patch("providers.registry.ModelProviderRegistry.get_provider_for_model")
def test_auto_model_error_before_fix_simulation(self, mock_get_provider):
"""
Simulate the error that would occur before the fix.
This test simulates what would happen if server.py didn't check requires_model()
and tried to resolve "auto" as a literal model name.
"""
# Mock the scenario where no provider is found for "auto"
mock_get_provider.return_value = None
# This should return None, simulating the "No provider found for model auto" error
result = mock_get_provider("auto")
assert result is None
# Verify that the mock was called with "auto"
mock_get_provider.assert_called_with("auto")
@patch("server.DEFAULT_MODEL", "auto")
async def test_planner_execution_bypasses_model_resolution(self):
"""
Test that planner tool execution works even when DEFAULT_MODEL is "auto".
This test confirms that the fix allows planner to work regardless of
model configuration since it doesn't need model resolution.
"""
planner = PlannerTool()
# Test with minimal planner arguments
arguments = {"step": "Test planning step", "step_number": 1, "total_steps": 1, "next_step_required": False}
# This should work without any model resolution
result = await planner.execute(arguments)
# Verify we got a result
assert isinstance(result, list)
assert len(result) > 0
assert isinstance(result[0], TextContent)
# Parse the JSON response to verify it's valid
import json
response_data = json.loads(result[0].text)
assert response_data["status"] == "planning_success"
assert response_data["step_number"] == 1
@patch("config.DEFAULT_MODEL", "auto")
def test_server_model_resolution_logic(self):
"""
Test the server-side logic that checks requires_model() before model resolution.
This simulates the key fix in server.py where we check tool.requires_model()
before attempting model resolution.
"""
planner = PlannerTool()
chat = ChatTool()
# Simulate the server logic
def simulate_server_model_resolution(tool, model_name):
"""Simulate the fixed server logic from server.py"""
if not tool.requires_model():
# Skip model resolution for tools that don't require models
return "SKIP_MODEL_RESOLUTION"
else:
# Would normally do model resolution here
return f"RESOLVE_MODEL_{model_name}"
# Test planner (should skip model resolution)
result = simulate_server_model_resolution(planner, "auto")
assert result == "SKIP_MODEL_RESOLUTION"
# Test chat (should attempt model resolution)
result = simulate_server_model_resolution(chat, "auto")
assert result == "RESOLVE_MODEL_auto"
def test_provider_registry_auto_handling(self):
"""
Test that the provider registry correctly handles model resolution.
This tests the scenario where providers don't recognize "auto" as a model.
"""
from providers.registry import ModelProviderRegistry
# This should return None since "auto" is not a real model name
provider = ModelProviderRegistry.get_provider_for_model("auto")
assert provider is None, "Provider registry should not find a provider for literal 'auto'"
@patch("config.DEFAULT_MODEL", "auto")
async def test_end_to_end_planner_with_auto_mode(self):
"""
End-to-end test of planner tool execution in auto mode.
This test verifies that the complete flow works when DEFAULT_MODEL is "auto"
and the planner tool is used.
"""
planner = PlannerTool()
# Verify the tool doesn't require model resolution
assert not planner.requires_model()
# Test a multi-step planning scenario
step1_args = {
"step": "Analyze the current system architecture",
"step_number": 1,
"total_steps": 3,
"next_step_required": True,
}
result1 = await planner.execute(step1_args)
assert len(result1) > 0
# Parse and verify the response
import json
response1 = json.loads(result1[0].text)
assert response1["status"] == "planning_success"
assert response1["next_step_required"] is True
assert "continuation_id" in response1
# Test step 2 with continuation
continuation_id = response1["continuation_id"]
step2_args = {
"step": "Design the microservices architecture",
"step_number": 2,
"total_steps": 3,
"next_step_required": True,
"continuation_id": continuation_id,
}
result2 = await planner.execute(step2_args)
assert len(result2) > 0
response2 = json.loads(result2[0].text)
assert response2["status"] == "planning_success"
assert response2["step_number"] == 2
def test_other_tools_still_require_models(self):
"""
Verify that other tools still properly require model resolution.
This ensures our fix doesn't break existing functionality.
"""
from tools.analyze import AnalyzeTool
from tools.chat import ChatTool
from tools.debug import DebugIssueTool
# Test various tools still require models
tools_requiring_models = [ChatTool(), DebugIssueTool(), AnalyzeTool()]
for tool in tools_requiring_models:
assert tool.requires_model() is True, f"{tool.get_name()} should require model resolution"

View File

@@ -513,9 +513,7 @@ class TestRegistryIntegration:
ProviderType.GOOGLE: type(mock_gemini),
}
with patch.dict(
os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}
):
with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini", "GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}):
# Clear cached restriction service
import utils.model_restrictions

View File

@@ -148,9 +148,7 @@ class TestGeminiProvider:
provider = GeminiModelProvider(api_key="test-key")
response = provider.generate_content(
prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7
)
response = provider.generate_content(prompt="Test prompt", model_name="gemini-2.5-flash", temperature=0.7)
assert isinstance(response, ModelResponse)
assert response.content == "Generated content"

View File

@@ -214,6 +214,18 @@ class BaseTool(ABC):
"""
pass
def requires_model(self) -> bool:
"""
Return whether this tool requires AI model access.
Tools that override execute() to do pure data processing (like planner)
should return False to skip model resolution at the MCP boundary.
Returns:
bool: True if tool needs AI model access (default), False for data-only tools
"""
return True
@classmethod
def _get_openrouter_registry(cls):
"""Get cached OpenRouter registry instance."""

View File

@@ -244,6 +244,16 @@ class PlannerTool(BaseTool):
def get_default_thinking_mode(self) -> str:
return "high" # Default to high thinking for comprehensive planning
def requires_model(self) -> bool:
"""
Planner tool doesn't require AI model access - it's pure data processing.
This prevents the server from trying to resolve model names like "auto"
when the planner tool is used, since it overrides execute() and doesn't
make any AI API calls.
"""
return False
async def execute(self, arguments: dict[str, Any]) -> list:
"""
Override execute to work like original TypeScript tool - no AI calls, just data processing.