refactor: remove MAX_OUTPUT_TOKENS configuration

Remove the hardcoded 32,768 token output limit to allow Gemini to use
its default/dynamic output token allocation. This provides more
flexibility for responses without artificial constraints.

- Remove MAX_OUTPUT_TOKENS constant from config
- Remove max_tokens parameter from ToolRequest base model
- Clean up all references in server.py and tools/base.py
- Remove test_output_tokens.py as it's no longer needed
- Update imports to remove MAX_OUTPUT_TOKENS references

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Fahad
2025-06-09 08:15:45 +04:00
parent 9bcc78c430
commit e7dcc681d3
5 changed files with 4 additions and 180 deletions

View File

@@ -2,7 +2,7 @@
Tests for configuration
"""
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS,
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS,
TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
__updated__, __version__)
@@ -27,7 +27,6 @@ class TestConfig:
"""Test model configuration"""
assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
assert MAX_CONTEXT_TOKENS == 1_000_000
assert MAX_OUTPUT_TOKENS == 32_768
def test_temperature_defaults(self):
"""Test temperature constants"""

View File

@@ -1,165 +0,0 @@
"""
Tests for MAX_OUTPUT_TOKENS configuration
"""
from unittest.mock import Mock, patch
import pytest
from config import MAX_OUTPUT_TOKENS
from tools.base import BaseTool, ToolRequest
class TestMaxOutputTokens:
"""Test that MAX_OUTPUT_TOKENS is properly applied"""
def test_max_output_tokens_value(self):
"""Test the MAX_OUTPUT_TOKENS constant value"""
assert MAX_OUTPUT_TOKENS == 32_768
def test_tool_request_default_max_tokens(self):
"""Test that ToolRequest has correct default max_tokens"""
request = ToolRequest()
assert request.max_tokens == MAX_OUTPUT_TOKENS
@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_base_tool_uses_max_output_tokens(self, mock_model):
"""Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation"""
# Create a concrete implementation of BaseTool for testing
class TestTool(BaseTool):
def get_name(self):
return "test_tool"
def get_description(self):
return "Test tool"
def get_input_schema(self):
return {
"type": "object",
"properties": {
"test": {"type": "string"}
},
"required": ["test"]
}
def get_system_prompt(self):
return "Test prompt"
def get_request_model(self):
class TestRequest(ToolRequest):
test: str
return TestRequest
async def prepare_prompt(self, request):
return f"Test: {request.test}"
# Mock response
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
mock_instance = Mock()
mock_instance.generate_content.return_value = mock_response
mock_model.return_value = mock_instance
# Execute tool
tool = TestTool()
await tool.execute({"test": "value"})
# Verify model was created with MAX_OUTPUT_TOKENS
mock_model.assert_called_once()
call_args = mock_model.call_args
# Check generation_config
assert "generation_config" in call_args[1]
config = call_args[1]["generation_config"]
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_custom_max_tokens_override(self, mock_model):
"""Test that custom max_tokens value overrides the default"""
class TestTool(BaseTool):
def get_name(self):
return "test_tool"
def get_description(self):
return "Test tool"
def get_input_schema(self):
return {
"type": "object",
"properties": {
"test": {"type": "string"},
"max_tokens": {"type": "integer"}
},
"required": ["test"]
}
def get_system_prompt(self):
return "Test prompt"
def get_request_model(self):
class TestRequest(ToolRequest):
test: str
return TestRequest
async def prepare_prompt(self, request):
return f"Test: {request.test}"
# Mock response
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
mock_instance = Mock()
mock_instance.generate_content.return_value = mock_response
mock_model.return_value = mock_instance
# Execute tool with custom max_tokens
tool = TestTool()
custom_max_tokens = 16384
await tool.execute({"test": "value", "max_tokens": custom_max_tokens})
# Verify model was created with custom max_tokens
mock_model.assert_called_once()
call_args = mock_model.call_args
# Check generation_config
assert "generation_config" in call_args[1]
config = call_args[1]["generation_config"]
assert config["max_output_tokens"] == custom_max_tokens
class TestServerMaxOutputTokens:
"""Test that server.py properly uses MAX_OUTPUT_TOKENS"""
@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_handle_chat_uses_max_output_tokens(self, mock_model):
"""Test that handle_chat uses MAX_OUTPUT_TOKENS"""
from server import handle_chat
# Mock response
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [Mock(text="Chat response")]
mock_instance = Mock()
mock_instance.generate_content.return_value = mock_response
mock_model.return_value = mock_instance
# Call handle_chat
await handle_chat({"prompt": "Test question"})
# Verify model was created with MAX_OUTPUT_TOKENS
mock_model.assert_called_once()
call_args = mock_model.call_args
# Check generation_config
assert "generation_config" in call_args[1]
config = call_args[1]["generation_config"]
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS