diff --git a/config.py b/config.py index e6c5092..862061b 100644 --- a/config.py +++ b/config.py @@ -10,7 +10,6 @@ __author__ = "Fahad Gilani" # Model configuration DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05" MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro -MAX_OUTPUT_TOKENS = 32_768 # Maximum output tokens for Gemini 2.5 Pro # Temperature defaults for different tool types TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging diff --git a/server.py b/server.py index 4fa75f2..7e15be7 100644 --- a/server.py +++ b/server.py @@ -15,7 +15,7 @@ from mcp.server.models import InitializationOptions from mcp.server.stdio import stdio_server from mcp.types import TextContent, Tool -from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, __author__, __updated__, +from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__, __version__) from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool @@ -167,7 +167,6 @@ async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]: model_name=DEFAULT_MODEL, generation_config={ "temperature": temperature, - "max_output_tokens": MAX_OUTPUT_TOKENS, "candidate_count": 1, }, ) diff --git a/tests/test_config.py b/tests/test_config.py index 37b2a8e..5d9e59d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,7 +2,7 @@ Tests for configuration """ -from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, +from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED, TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__, __updated__, __version__) @@ -27,7 +27,6 @@ class TestConfig: """Test model configuration""" assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05" assert MAX_CONTEXT_TOKENS == 1_000_000 - assert MAX_OUTPUT_TOKENS == 32_768 def test_temperature_defaults(self): """Test temperature constants""" diff --git a/tests/test_output_tokens.py b/tests/test_output_tokens.py deleted file mode 100644 index 484b949..0000000 --- a/tests/test_output_tokens.py +++ /dev/null @@ -1,165 +0,0 @@ -""" -Tests for MAX_OUTPUT_TOKENS configuration -""" - -from unittest.mock import Mock, patch - -import pytest - -from config import MAX_OUTPUT_TOKENS -from tools.base import BaseTool, ToolRequest - - -class TestMaxOutputTokens: - """Test that MAX_OUTPUT_TOKENS is properly applied""" - - def test_max_output_tokens_value(self): - """Test the MAX_OUTPUT_TOKENS constant value""" - assert MAX_OUTPUT_TOKENS == 32_768 - - def test_tool_request_default_max_tokens(self): - """Test that ToolRequest has correct default max_tokens""" - request = ToolRequest() - assert request.max_tokens == MAX_OUTPUT_TOKENS - - @pytest.mark.asyncio - @patch("google.generativeai.GenerativeModel") - async def test_base_tool_uses_max_output_tokens(self, mock_model): - """Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation""" - - # Create a concrete implementation of BaseTool for testing - class TestTool(BaseTool): - def get_name(self): - return "test_tool" - - def get_description(self): - return "Test tool" - - def get_input_schema(self): - return { - "type": "object", - "properties": { - "test": {"type": "string"} - }, - "required": ["test"] - } - - def get_system_prompt(self): - return "Test prompt" - - def get_request_model(self): - class TestRequest(ToolRequest): - test: str - return TestRequest - - async def prepare_prompt(self, request): - return f"Test: {request.test}" - - # Mock response - mock_response = Mock() - mock_response.candidates = [Mock()] - mock_response.candidates[0].content.parts = [Mock(text="Test response")] - - mock_instance = Mock() - mock_instance.generate_content.return_value = mock_response - mock_model.return_value = mock_instance - - # Execute tool - tool = TestTool() - await tool.execute({"test": "value"}) - - # Verify model was created with MAX_OUTPUT_TOKENS - mock_model.assert_called_once() - call_args = mock_model.call_args - - # Check generation_config - assert "generation_config" in call_args[1] - config = call_args[1]["generation_config"] - assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS - - @pytest.mark.asyncio - @patch("google.generativeai.GenerativeModel") - async def test_custom_max_tokens_override(self, mock_model): - """Test that custom max_tokens value overrides the default""" - - class TestTool(BaseTool): - def get_name(self): - return "test_tool" - - def get_description(self): - return "Test tool" - - def get_input_schema(self): - return { - "type": "object", - "properties": { - "test": {"type": "string"}, - "max_tokens": {"type": "integer"} - }, - "required": ["test"] - } - - def get_system_prompt(self): - return "Test prompt" - - def get_request_model(self): - class TestRequest(ToolRequest): - test: str - return TestRequest - - async def prepare_prompt(self, request): - return f"Test: {request.test}" - - # Mock response - mock_response = Mock() - mock_response.candidates = [Mock()] - mock_response.candidates[0].content.parts = [Mock(text="Test response")] - - mock_instance = Mock() - mock_instance.generate_content.return_value = mock_response - mock_model.return_value = mock_instance - - # Execute tool with custom max_tokens - tool = TestTool() - custom_max_tokens = 16384 - await tool.execute({"test": "value", "max_tokens": custom_max_tokens}) - - # Verify model was created with custom max_tokens - mock_model.assert_called_once() - call_args = mock_model.call_args - - # Check generation_config - assert "generation_config" in call_args[1] - config = call_args[1]["generation_config"] - assert config["max_output_tokens"] == custom_max_tokens - - -class TestServerMaxOutputTokens: - """Test that server.py properly uses MAX_OUTPUT_TOKENS""" - - @pytest.mark.asyncio - @patch("google.generativeai.GenerativeModel") - async def test_handle_chat_uses_max_output_tokens(self, mock_model): - """Test that handle_chat uses MAX_OUTPUT_TOKENS""" - from server import handle_chat - - # Mock response - mock_response = Mock() - mock_response.candidates = [Mock()] - mock_response.candidates[0].content.parts = [Mock(text="Chat response")] - - mock_instance = Mock() - mock_instance.generate_content.return_value = mock_response - mock_model.return_value = mock_instance - - # Call handle_chat - await handle_chat({"prompt": "Test question"}) - - # Verify model was created with MAX_OUTPUT_TOKENS - mock_model.assert_called_once() - call_args = mock_model.call_args - - # Check generation_config - assert "generation_config" in call_args[1] - config = call_args[1]["generation_config"] - assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS diff --git a/tools/base.py b/tools/base.py index 8ad5fe0..2ed7f26 100644 --- a/tools/base.py +++ b/tools/base.py @@ -9,18 +9,12 @@ import google.generativeai as genai from mcp.types import TextContent from pydantic import BaseModel, Field -from config import MAX_OUTPUT_TOKENS - - class ToolRequest(BaseModel): """Base request model for all tools""" model: Optional[str] = Field( None, description="Model to use (defaults to Gemini 2.5 Pro)" ) - max_tokens: Optional[int] = Field( - MAX_OUTPUT_TOKENS, description="Maximum number of tokens in response" - ) temperature: Optional[float] = Field( None, description="Temperature for response (tool-specific defaults)" ) @@ -80,10 +74,9 @@ class BaseTool(ABC): temperature = getattr(request, "temperature", None) if temperature is None: temperature = self.get_default_temperature() - max_tokens = getattr(request, "max_tokens", MAX_OUTPUT_TOKENS) # Create and configure model - model = self.create_model(model_name, temperature, max_tokens) + model = self.create_model(model_name, temperature) # Generate response response = model.generate_content(prompt) @@ -118,14 +111,13 @@ class BaseTool(ABC): return response def create_model( - self, model_name: str, temperature: float, max_tokens: int + self, model_name: str, temperature: float ) -> genai.GenerativeModel: """Create a configured Gemini model""" return genai.GenerativeModel( model_name=model_name, generation_config={ "temperature": temperature, - "max_output_tokens": max_tokens, "candidate_count": 1, }, )