refactor: remove MAX_OUTPUT_TOKENS configuration

Remove the hardcoded 32,768 token output limit to allow Gemini to use its default/dynamic output token allocation. This provides more flexibility for responses without artificial constraints. - Remove MAX_OUTPUT_TOKENS constant from config - Remove max_tokens parameter from ToolRequest base model - Clean up all references in server.py and tools/base.py - Remove test_output_tokens.py as it's no longer needed - Update imports to remove MAX_OUTPUT_TOKENS references 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-09 08:15:45 +04:00
parent 9bcc78c430
commit e7dcc681d3
5 changed files with 4 additions and 180 deletions
--- a/config.py
+++ b/config.py
@@ -10,7 +10,6 @@ __author__ = "Fahad Gilani"
 # Model configuration
 DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05"
 MAX_CONTEXT_TOKENS = 1_000_000  # 1M tokens for Gemini Pro
-MAX_OUTPUT_TOKENS = 32_768  # Maximum output tokens for Gemini 2.5 Pro

 # Temperature defaults for different tool types
 TEMPERATURE_ANALYTICAL = 0.2  # For code review, debugging
--- a/server.py
+++ b/server.py
@@ -15,7 +15,7 @@ from mcp.server.models import InitializationOptions
 from mcp.server.stdio import stdio_server
 from mcp.types import TextContent, Tool

-from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, __author__, __updated__,
+from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__,
                    __version__)
 from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool

@@ -167,7 +167,6 @@ async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]:
            model_name=DEFAULT_MODEL,
            generation_config={
                "temperature": temperature,
-                "max_output_tokens": MAX_OUTPUT_TOKENS,
                "candidate_count": 1,
            },
        )
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -2,7 +2,7 @@
 Tests for configuration
 """

-from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS,
+from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS,
                    TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
                    TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
                    __updated__, __version__)
@@ -27,7 +27,6 @@ class TestConfig:
        """Test model configuration"""
        assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
        assert MAX_CONTEXT_TOKENS == 1_000_000
-        assert MAX_OUTPUT_TOKENS == 32_768

    def test_temperature_defaults(self):
        """Test temperature constants"""
--- a/tests/test_output_tokens.py
+++ b/tests/test_output_tokens.py
@@ -1,165 +0,0 @@
-"""
-Tests for MAX_OUTPUT_TOKENS configuration
-"""
-
-from unittest.mock import Mock, patch
-
-import pytest
-
-from config import MAX_OUTPUT_TOKENS
-from tools.base import BaseTool, ToolRequest
-
-
-class TestMaxOutputTokens:
-    """Test that MAX_OUTPUT_TOKENS is properly applied"""
-
-    def test_max_output_tokens_value(self):
-        """Test the MAX_OUTPUT_TOKENS constant value"""
-        assert MAX_OUTPUT_TOKENS == 32_768
-
-    def test_tool_request_default_max_tokens(self):
-        """Test that ToolRequest has correct default max_tokens"""
-        request = ToolRequest()
-        assert request.max_tokens == MAX_OUTPUT_TOKENS
-
-    @pytest.mark.asyncio
-    @patch("google.generativeai.GenerativeModel")
-    async def test_base_tool_uses_max_output_tokens(self, mock_model):
-        """Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation"""
-
-        # Create a concrete implementation of BaseTool for testing
-        class TestTool(BaseTool):
-            def get_name(self):
-                return "test_tool"
-
-            def get_description(self):
-                return "Test tool"
-
-            def get_input_schema(self):
-                return {
-                    "type": "object",
-                    "properties": {
-                        "test": {"type": "string"}
-                    },
-                    "required": ["test"]
-                }
-
-            def get_system_prompt(self):
-                return "Test prompt"
-
-            def get_request_model(self):
-                class TestRequest(ToolRequest):
-                    test: str
-                return TestRequest
-
-            async def prepare_prompt(self, request):
-                return f"Test: {request.test}"
-
-        # Mock response
-        mock_response = Mock()
-        mock_response.candidates = [Mock()]
-        mock_response.candidates[0].content.parts = [Mock(text="Test response")]
-
-        mock_instance = Mock()
-        mock_instance.generate_content.return_value = mock_response
-        mock_model.return_value = mock_instance
-
-        # Execute tool
-        tool = TestTool()
-        await tool.execute({"test": "value"})
-
-        # Verify model was created with MAX_OUTPUT_TOKENS
-        mock_model.assert_called_once()
-        call_args = mock_model.call_args
-
-        # Check generation_config
-        assert "generation_config" in call_args[1]
-        config = call_args[1]["generation_config"]
-        assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
-
-    @pytest.mark.asyncio
-    @patch("google.generativeai.GenerativeModel")
-    async def test_custom_max_tokens_override(self, mock_model):
-        """Test that custom max_tokens value overrides the default"""
-
-        class TestTool(BaseTool):
-            def get_name(self):
-                return "test_tool"
-
-            def get_description(self):
-                return "Test tool"
-
-            def get_input_schema(self):
-                return {
-                    "type": "object",
-                    "properties": {
-                        "test": {"type": "string"},
-                        "max_tokens": {"type": "integer"}
-                    },
-                    "required": ["test"]
-                }
-
-            def get_system_prompt(self):
-                return "Test prompt"
-
-            def get_request_model(self):
-                class TestRequest(ToolRequest):
-                    test: str
-                return TestRequest
-
-            async def prepare_prompt(self, request):
-                return f"Test: {request.test}"
-
-        # Mock response
-        mock_response = Mock()
-        mock_response.candidates = [Mock()]
-        mock_response.candidates[0].content.parts = [Mock(text="Test response")]
-
-        mock_instance = Mock()
-        mock_instance.generate_content.return_value = mock_response
-        mock_model.return_value = mock_instance
-
-        # Execute tool with custom max_tokens
-        tool = TestTool()
-        custom_max_tokens = 16384
-        await tool.execute({"test": "value", "max_tokens": custom_max_tokens})
-
-        # Verify model was created with custom max_tokens
-        mock_model.assert_called_once()
-        call_args = mock_model.call_args
-
-        # Check generation_config
-        assert "generation_config" in call_args[1]
-        config = call_args[1]["generation_config"]
-        assert config["max_output_tokens"] == custom_max_tokens
-
-
-class TestServerMaxOutputTokens:
-    """Test that server.py properly uses MAX_OUTPUT_TOKENS"""
-
-    @pytest.mark.asyncio
-    @patch("google.generativeai.GenerativeModel")
-    async def test_handle_chat_uses_max_output_tokens(self, mock_model):
-        """Test that handle_chat uses MAX_OUTPUT_TOKENS"""
-        from server import handle_chat
-
-        # Mock response
-        mock_response = Mock()
-        mock_response.candidates = [Mock()]
-        mock_response.candidates[0].content.parts = [Mock(text="Chat response")]
-
-        mock_instance = Mock()
-        mock_instance.generate_content.return_value = mock_response
-        mock_model.return_value = mock_instance
-
-        # Call handle_chat
-        await handle_chat({"prompt": "Test question"})
-
-        # Verify model was created with MAX_OUTPUT_TOKENS
-        mock_model.assert_called_once()
-        call_args = mock_model.call_args
-
-        # Check generation_config
-        assert "generation_config" in call_args[1]
-        config = call_args[1]["generation_config"]
-        assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
--- a/tools/base.py
+++ b/tools/base.py
@@ -9,18 +9,12 @@ import google.generativeai as genai
 from mcp.types import TextContent
 from pydantic import BaseModel, Field

-from config import MAX_OUTPUT_TOKENS
-
-
 class ToolRequest(BaseModel):
    """Base request model for all tools"""

    model: Optional[str] = Field(
        None, description="Model to use (defaults to Gemini 2.5 Pro)"
    )
-    max_tokens: Optional[int] = Field(
-        MAX_OUTPUT_TOKENS, description="Maximum number of tokens in response"
-    )
    temperature: Optional[float] = Field(
        None, description="Temperature for response (tool-specific defaults)"
    )
@@ -80,10 +74,9 @@ class BaseTool(ABC):
            temperature = getattr(request, "temperature", None)
            if temperature is None:
                temperature = self.get_default_temperature()
-            max_tokens = getattr(request, "max_tokens", MAX_OUTPUT_TOKENS)

            # Create and configure model
-            model = self.create_model(model_name, temperature, max_tokens)
+            model = self.create_model(model_name, temperature)

            # Generate response
            response = model.generate_content(prompt)
@@ -118,14 +111,13 @@ class BaseTool(ABC):
        return response

    def create_model(
-        self, model_name: str, temperature: float, max_tokens: int
+        self, model_name: str, temperature: float
    ) -> genai.GenerativeModel:
        """Create a configured Gemini model"""
        return genai.GenerativeModel(
            model_name=model_name,
            generation_config={
                "temperature": temperature,
-                "max_output_tokens": max_tokens,
                "candidate_count": 1,
            },
        )