refactor: remove MAX_OUTPUT_TOKENS configuration

Remove the hardcoded 32,768 token output limit to allow Gemini to use
its default/dynamic output token allocation. This provides more
flexibility for responses without artificial constraints.

- Remove MAX_OUTPUT_TOKENS constant from config
- Remove max_tokens parameter from ToolRequest base model
- Clean up all references in server.py and tools/base.py
- Remove test_output_tokens.py as it's no longer needed
- Update imports to remove MAX_OUTPUT_TOKENS references

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Fahad
2025-06-09 08:15:45 +04:00
parent 9bcc78c430
commit e7dcc681d3
5 changed files with 4 additions and 180 deletions

View File

@@ -10,7 +10,6 @@ __author__ = "Fahad Gilani"
# Model configuration
DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05"
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro
MAX_OUTPUT_TOKENS = 32_768 # Maximum output tokens for Gemini 2.5 Pro
# Temperature defaults for different tool types
TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging

View File

@@ -15,7 +15,7 @@ from mcp.server.models import InitializationOptions
from mcp.server.stdio import stdio_server
from mcp.types import TextContent, Tool
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, __author__, __updated__,
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__,
__version__)
from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool
@@ -167,7 +167,6 @@ async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]:
model_name=DEFAULT_MODEL,
generation_config={
"temperature": temperature,
"max_output_tokens": MAX_OUTPUT_TOKENS,
"candidate_count": 1,
},
)

View File

@@ -2,7 +2,7 @@
Tests for configuration
"""
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS,
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS,
TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
__updated__, __version__)
@@ -27,7 +27,6 @@ class TestConfig:
"""Test model configuration"""
assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
assert MAX_CONTEXT_TOKENS == 1_000_000
assert MAX_OUTPUT_TOKENS == 32_768
def test_temperature_defaults(self):
"""Test temperature constants"""

View File

@@ -1,165 +0,0 @@
"""
Tests for MAX_OUTPUT_TOKENS configuration
"""
from unittest.mock import Mock, patch
import pytest
from config import MAX_OUTPUT_TOKENS
from tools.base import BaseTool, ToolRequest
class TestMaxOutputTokens:
"""Test that MAX_OUTPUT_TOKENS is properly applied"""
def test_max_output_tokens_value(self):
"""Test the MAX_OUTPUT_TOKENS constant value"""
assert MAX_OUTPUT_TOKENS == 32_768
def test_tool_request_default_max_tokens(self):
"""Test that ToolRequest has correct default max_tokens"""
request = ToolRequest()
assert request.max_tokens == MAX_OUTPUT_TOKENS
@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_base_tool_uses_max_output_tokens(self, mock_model):
"""Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation"""
# Create a concrete implementation of BaseTool for testing
class TestTool(BaseTool):
def get_name(self):
return "test_tool"
def get_description(self):
return "Test tool"
def get_input_schema(self):
return {
"type": "object",
"properties": {
"test": {"type": "string"}
},
"required": ["test"]
}
def get_system_prompt(self):
return "Test prompt"
def get_request_model(self):
class TestRequest(ToolRequest):
test: str
return TestRequest
async def prepare_prompt(self, request):
return f"Test: {request.test}"
# Mock response
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
mock_instance = Mock()
mock_instance.generate_content.return_value = mock_response
mock_model.return_value = mock_instance
# Execute tool
tool = TestTool()
await tool.execute({"test": "value"})
# Verify model was created with MAX_OUTPUT_TOKENS
mock_model.assert_called_once()
call_args = mock_model.call_args
# Check generation_config
assert "generation_config" in call_args[1]
config = call_args[1]["generation_config"]
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_custom_max_tokens_override(self, mock_model):
"""Test that custom max_tokens value overrides the default"""
class TestTool(BaseTool):
def get_name(self):
return "test_tool"
def get_description(self):
return "Test tool"
def get_input_schema(self):
return {
"type": "object",
"properties": {
"test": {"type": "string"},
"max_tokens": {"type": "integer"}
},
"required": ["test"]
}
def get_system_prompt(self):
return "Test prompt"
def get_request_model(self):
class TestRequest(ToolRequest):
test: str
return TestRequest
async def prepare_prompt(self, request):
return f"Test: {request.test}"
# Mock response
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
mock_instance = Mock()
mock_instance.generate_content.return_value = mock_response
mock_model.return_value = mock_instance
# Execute tool with custom max_tokens
tool = TestTool()
custom_max_tokens = 16384
await tool.execute({"test": "value", "max_tokens": custom_max_tokens})
# Verify model was created with custom max_tokens
mock_model.assert_called_once()
call_args = mock_model.call_args
# Check generation_config
assert "generation_config" in call_args[1]
config = call_args[1]["generation_config"]
assert config["max_output_tokens"] == custom_max_tokens
class TestServerMaxOutputTokens:
"""Test that server.py properly uses MAX_OUTPUT_TOKENS"""
@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_handle_chat_uses_max_output_tokens(self, mock_model):
"""Test that handle_chat uses MAX_OUTPUT_TOKENS"""
from server import handle_chat
# Mock response
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [Mock(text="Chat response")]
mock_instance = Mock()
mock_instance.generate_content.return_value = mock_response
mock_model.return_value = mock_instance
# Call handle_chat
await handle_chat({"prompt": "Test question"})
# Verify model was created with MAX_OUTPUT_TOKENS
mock_model.assert_called_once()
call_args = mock_model.call_args
# Check generation_config
assert "generation_config" in call_args[1]
config = call_args[1]["generation_config"]
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS

View File

@@ -9,18 +9,12 @@ import google.generativeai as genai
from mcp.types import TextContent
from pydantic import BaseModel, Field
from config import MAX_OUTPUT_TOKENS
class ToolRequest(BaseModel):
"""Base request model for all tools"""
model: Optional[str] = Field(
None, description="Model to use (defaults to Gemini 2.5 Pro)"
)
max_tokens: Optional[int] = Field(
MAX_OUTPUT_TOKENS, description="Maximum number of tokens in response"
)
temperature: Optional[float] = Field(
None, description="Temperature for response (tool-specific defaults)"
)
@@ -80,10 +74,9 @@ class BaseTool(ABC):
temperature = getattr(request, "temperature", None)
if temperature is None:
temperature = self.get_default_temperature()
max_tokens = getattr(request, "max_tokens", MAX_OUTPUT_TOKENS)
# Create and configure model
model = self.create_model(model_name, temperature, max_tokens)
model = self.create_model(model_name, temperature)
# Generate response
response = model.generate_content(prompt)
@@ -118,14 +111,13 @@ class BaseTool(ABC):
return response
def create_model(
self, model_name: str, temperature: float, max_tokens: int
self, model_name: str, temperature: float
) -> genai.GenerativeModel:
"""Create a configured Gemini model"""
return genai.GenerativeModel(
model_name=model_name,
generation_config={
"temperature": temperature,
"max_output_tokens": max_tokens,
"candidate_count": 1,
},
)