refactor: remove MAX_OUTPUT_TOKENS configuration
Remove the hardcoded 32,768 token output limit to allow Gemini to use its default/dynamic output token allocation. This provides more flexibility for responses without artificial constraints. - Remove MAX_OUTPUT_TOKENS constant from config - Remove max_tokens parameter from ToolRequest base model - Clean up all references in server.py and tools/base.py - Remove test_output_tokens.py as it's no longer needed - Update imports to remove MAX_OUTPUT_TOKENS references 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -10,7 +10,6 @@ __author__ = "Fahad Gilani"
|
|||||||
# Model configuration
|
# Model configuration
|
||||||
DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05"
|
DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05"
|
||||||
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro
|
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro
|
||||||
MAX_OUTPUT_TOKENS = 32_768 # Maximum output tokens for Gemini 2.5 Pro
|
|
||||||
|
|
||||||
# Temperature defaults for different tool types
|
# Temperature defaults for different tool types
|
||||||
TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from mcp.server.models import InitializationOptions
|
|||||||
from mcp.server.stdio import stdio_server
|
from mcp.server.stdio import stdio_server
|
||||||
from mcp.types import TextContent, Tool
|
from mcp.types import TextContent, Tool
|
||||||
|
|
||||||
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, __author__, __updated__,
|
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__,
|
||||||
__version__)
|
__version__)
|
||||||
from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool
|
from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool
|
||||||
|
|
||||||
@@ -167,7 +167,6 @@ async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]:
|
|||||||
model_name=DEFAULT_MODEL,
|
model_name=DEFAULT_MODEL,
|
||||||
generation_config={
|
generation_config={
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"max_output_tokens": MAX_OUTPUT_TOKENS,
|
|
||||||
"candidate_count": 1,
|
"candidate_count": 1,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
Tests for configuration
|
Tests for configuration
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS,
|
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS,
|
||||||
TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
|
TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
|
||||||
TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
|
TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
|
||||||
__updated__, __version__)
|
__updated__, __version__)
|
||||||
@@ -27,7 +27,6 @@ class TestConfig:
|
|||||||
"""Test model configuration"""
|
"""Test model configuration"""
|
||||||
assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
|
assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
|
||||||
assert MAX_CONTEXT_TOKENS == 1_000_000
|
assert MAX_CONTEXT_TOKENS == 1_000_000
|
||||||
assert MAX_OUTPUT_TOKENS == 32_768
|
|
||||||
|
|
||||||
def test_temperature_defaults(self):
|
def test_temperature_defaults(self):
|
||||||
"""Test temperature constants"""
|
"""Test temperature constants"""
|
||||||
|
|||||||
@@ -1,165 +0,0 @@
|
|||||||
"""
|
|
||||||
Tests for MAX_OUTPUT_TOKENS configuration
|
|
||||||
"""
|
|
||||||
|
|
||||||
from unittest.mock import Mock, patch
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from config import MAX_OUTPUT_TOKENS
|
|
||||||
from tools.base import BaseTool, ToolRequest
|
|
||||||
|
|
||||||
|
|
||||||
class TestMaxOutputTokens:
|
|
||||||
"""Test that MAX_OUTPUT_TOKENS is properly applied"""
|
|
||||||
|
|
||||||
def test_max_output_tokens_value(self):
|
|
||||||
"""Test the MAX_OUTPUT_TOKENS constant value"""
|
|
||||||
assert MAX_OUTPUT_TOKENS == 32_768
|
|
||||||
|
|
||||||
def test_tool_request_default_max_tokens(self):
|
|
||||||
"""Test that ToolRequest has correct default max_tokens"""
|
|
||||||
request = ToolRequest()
|
|
||||||
assert request.max_tokens == MAX_OUTPUT_TOKENS
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
@patch("google.generativeai.GenerativeModel")
|
|
||||||
async def test_base_tool_uses_max_output_tokens(self, mock_model):
|
|
||||||
"""Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation"""
|
|
||||||
|
|
||||||
# Create a concrete implementation of BaseTool for testing
|
|
||||||
class TestTool(BaseTool):
|
|
||||||
def get_name(self):
|
|
||||||
return "test_tool"
|
|
||||||
|
|
||||||
def get_description(self):
|
|
||||||
return "Test tool"
|
|
||||||
|
|
||||||
def get_input_schema(self):
|
|
||||||
return {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"test": {"type": "string"}
|
|
||||||
},
|
|
||||||
"required": ["test"]
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_system_prompt(self):
|
|
||||||
return "Test prompt"
|
|
||||||
|
|
||||||
def get_request_model(self):
|
|
||||||
class TestRequest(ToolRequest):
|
|
||||||
test: str
|
|
||||||
return TestRequest
|
|
||||||
|
|
||||||
async def prepare_prompt(self, request):
|
|
||||||
return f"Test: {request.test}"
|
|
||||||
|
|
||||||
# Mock response
|
|
||||||
mock_response = Mock()
|
|
||||||
mock_response.candidates = [Mock()]
|
|
||||||
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
|
|
||||||
|
|
||||||
mock_instance = Mock()
|
|
||||||
mock_instance.generate_content.return_value = mock_response
|
|
||||||
mock_model.return_value = mock_instance
|
|
||||||
|
|
||||||
# Execute tool
|
|
||||||
tool = TestTool()
|
|
||||||
await tool.execute({"test": "value"})
|
|
||||||
|
|
||||||
# Verify model was created with MAX_OUTPUT_TOKENS
|
|
||||||
mock_model.assert_called_once()
|
|
||||||
call_args = mock_model.call_args
|
|
||||||
|
|
||||||
# Check generation_config
|
|
||||||
assert "generation_config" in call_args[1]
|
|
||||||
config = call_args[1]["generation_config"]
|
|
||||||
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
@patch("google.generativeai.GenerativeModel")
|
|
||||||
async def test_custom_max_tokens_override(self, mock_model):
|
|
||||||
"""Test that custom max_tokens value overrides the default"""
|
|
||||||
|
|
||||||
class TestTool(BaseTool):
|
|
||||||
def get_name(self):
|
|
||||||
return "test_tool"
|
|
||||||
|
|
||||||
def get_description(self):
|
|
||||||
return "Test tool"
|
|
||||||
|
|
||||||
def get_input_schema(self):
|
|
||||||
return {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"test": {"type": "string"},
|
|
||||||
"max_tokens": {"type": "integer"}
|
|
||||||
},
|
|
||||||
"required": ["test"]
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_system_prompt(self):
|
|
||||||
return "Test prompt"
|
|
||||||
|
|
||||||
def get_request_model(self):
|
|
||||||
class TestRequest(ToolRequest):
|
|
||||||
test: str
|
|
||||||
return TestRequest
|
|
||||||
|
|
||||||
async def prepare_prompt(self, request):
|
|
||||||
return f"Test: {request.test}"
|
|
||||||
|
|
||||||
# Mock response
|
|
||||||
mock_response = Mock()
|
|
||||||
mock_response.candidates = [Mock()]
|
|
||||||
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
|
|
||||||
|
|
||||||
mock_instance = Mock()
|
|
||||||
mock_instance.generate_content.return_value = mock_response
|
|
||||||
mock_model.return_value = mock_instance
|
|
||||||
|
|
||||||
# Execute tool with custom max_tokens
|
|
||||||
tool = TestTool()
|
|
||||||
custom_max_tokens = 16384
|
|
||||||
await tool.execute({"test": "value", "max_tokens": custom_max_tokens})
|
|
||||||
|
|
||||||
# Verify model was created with custom max_tokens
|
|
||||||
mock_model.assert_called_once()
|
|
||||||
call_args = mock_model.call_args
|
|
||||||
|
|
||||||
# Check generation_config
|
|
||||||
assert "generation_config" in call_args[1]
|
|
||||||
config = call_args[1]["generation_config"]
|
|
||||||
assert config["max_output_tokens"] == custom_max_tokens
|
|
||||||
|
|
||||||
|
|
||||||
class TestServerMaxOutputTokens:
|
|
||||||
"""Test that server.py properly uses MAX_OUTPUT_TOKENS"""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
@patch("google.generativeai.GenerativeModel")
|
|
||||||
async def test_handle_chat_uses_max_output_tokens(self, mock_model):
|
|
||||||
"""Test that handle_chat uses MAX_OUTPUT_TOKENS"""
|
|
||||||
from server import handle_chat
|
|
||||||
|
|
||||||
# Mock response
|
|
||||||
mock_response = Mock()
|
|
||||||
mock_response.candidates = [Mock()]
|
|
||||||
mock_response.candidates[0].content.parts = [Mock(text="Chat response")]
|
|
||||||
|
|
||||||
mock_instance = Mock()
|
|
||||||
mock_instance.generate_content.return_value = mock_response
|
|
||||||
mock_model.return_value = mock_instance
|
|
||||||
|
|
||||||
# Call handle_chat
|
|
||||||
await handle_chat({"prompt": "Test question"})
|
|
||||||
|
|
||||||
# Verify model was created with MAX_OUTPUT_TOKENS
|
|
||||||
mock_model.assert_called_once()
|
|
||||||
call_args = mock_model.call_args
|
|
||||||
|
|
||||||
# Check generation_config
|
|
||||||
assert "generation_config" in call_args[1]
|
|
||||||
config = call_args[1]["generation_config"]
|
|
||||||
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
|
|
||||||
@@ -9,18 +9,12 @@ import google.generativeai as genai
|
|||||||
from mcp.types import TextContent
|
from mcp.types import TextContent
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from config import MAX_OUTPUT_TOKENS
|
|
||||||
|
|
||||||
|
|
||||||
class ToolRequest(BaseModel):
|
class ToolRequest(BaseModel):
|
||||||
"""Base request model for all tools"""
|
"""Base request model for all tools"""
|
||||||
|
|
||||||
model: Optional[str] = Field(
|
model: Optional[str] = Field(
|
||||||
None, description="Model to use (defaults to Gemini 2.5 Pro)"
|
None, description="Model to use (defaults to Gemini 2.5 Pro)"
|
||||||
)
|
)
|
||||||
max_tokens: Optional[int] = Field(
|
|
||||||
MAX_OUTPUT_TOKENS, description="Maximum number of tokens in response"
|
|
||||||
)
|
|
||||||
temperature: Optional[float] = Field(
|
temperature: Optional[float] = Field(
|
||||||
None, description="Temperature for response (tool-specific defaults)"
|
None, description="Temperature for response (tool-specific defaults)"
|
||||||
)
|
)
|
||||||
@@ -80,10 +74,9 @@ class BaseTool(ABC):
|
|||||||
temperature = getattr(request, "temperature", None)
|
temperature = getattr(request, "temperature", None)
|
||||||
if temperature is None:
|
if temperature is None:
|
||||||
temperature = self.get_default_temperature()
|
temperature = self.get_default_temperature()
|
||||||
max_tokens = getattr(request, "max_tokens", MAX_OUTPUT_TOKENS)
|
|
||||||
|
|
||||||
# Create and configure model
|
# Create and configure model
|
||||||
model = self.create_model(model_name, temperature, max_tokens)
|
model = self.create_model(model_name, temperature)
|
||||||
|
|
||||||
# Generate response
|
# Generate response
|
||||||
response = model.generate_content(prompt)
|
response = model.generate_content(prompt)
|
||||||
@@ -118,14 +111,13 @@ class BaseTool(ABC):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
def create_model(
|
def create_model(
|
||||||
self, model_name: str, temperature: float, max_tokens: int
|
self, model_name: str, temperature: float
|
||||||
) -> genai.GenerativeModel:
|
) -> genai.GenerativeModel:
|
||||||
"""Create a configured Gemini model"""
|
"""Create a configured Gemini model"""
|
||||||
return genai.GenerativeModel(
|
return genai.GenerativeModel(
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
generation_config={
|
generation_config={
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"max_output_tokens": max_tokens,
|
|
||||||
"candidate_count": 1,
|
"candidate_count": 1,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user