fix: increase output token limit to prevent response truncation
- Add MAX_OUTPUT_TOKENS constant set to 32,768 (Gemini 2.5 Pro's limit) - Update all tools and chat handler to use MAX_OUTPUT_TOKENS - Add comprehensive tests for output token configuration - Update README with configuration details and system prompt docs This fixes the issue where Gemini responses were being cut off at 8192 tokens, causing Claude to repeatedly ask for the same analysis. Fixes #1 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -2,9 +2,10 @@
|
||||
Tests for configuration
|
||||
"""
|
||||
|
||||
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL,
|
||||
TEMPERATURE_BALANCED, TEMPERATURE_CREATIVE, TOOL_TRIGGERS,
|
||||
__author__, __updated__, __version__)
|
||||
from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS,
|
||||
TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
|
||||
TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
|
||||
__updated__, __version__)
|
||||
|
||||
|
||||
class TestConfig:
|
||||
@@ -20,6 +21,7 @@ class TestConfig:
|
||||
"""Test model configuration"""
|
||||
assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
|
||||
assert MAX_CONTEXT_TOKENS == 1_000_000
|
||||
assert MAX_OUTPUT_TOKENS == 32_768
|
||||
|
||||
def test_temperature_defaults(self):
|
||||
"""Test temperature constants"""
|
||||
|
||||
165
tests/test_output_tokens.py
Normal file
165
tests/test_output_tokens.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Tests for MAX_OUTPUT_TOKENS configuration
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from config import MAX_OUTPUT_TOKENS
|
||||
from tools.base import BaseTool, ToolRequest
|
||||
|
||||
|
||||
class TestMaxOutputTokens:
|
||||
"""Test that MAX_OUTPUT_TOKENS is properly applied"""
|
||||
|
||||
def test_max_output_tokens_value(self):
|
||||
"""Test the MAX_OUTPUT_TOKENS constant value"""
|
||||
assert MAX_OUTPUT_TOKENS == 32_768
|
||||
|
||||
def test_tool_request_default_max_tokens(self):
|
||||
"""Test that ToolRequest has correct default max_tokens"""
|
||||
request = ToolRequest()
|
||||
assert request.max_tokens == MAX_OUTPUT_TOKENS
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("google.generativeai.GenerativeModel")
|
||||
async def test_base_tool_uses_max_output_tokens(self, mock_model):
|
||||
"""Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation"""
|
||||
|
||||
# Create a concrete implementation of BaseTool for testing
|
||||
class TestTool(BaseTool):
|
||||
def get_name(self):
|
||||
return "test_tool"
|
||||
|
||||
def get_description(self):
|
||||
return "Test tool"
|
||||
|
||||
def get_input_schema(self):
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"test": {"type": "string"}
|
||||
},
|
||||
"required": ["test"]
|
||||
}
|
||||
|
||||
def get_system_prompt(self):
|
||||
return "Test prompt"
|
||||
|
||||
def get_request_model(self):
|
||||
class TestRequest(ToolRequest):
|
||||
test: str
|
||||
return TestRequest
|
||||
|
||||
async def prepare_prompt(self, request):
|
||||
return f"Test: {request.test}"
|
||||
|
||||
# Mock response
|
||||
mock_response = Mock()
|
||||
mock_response.candidates = [Mock()]
|
||||
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
|
||||
|
||||
mock_instance = Mock()
|
||||
mock_instance.generate_content.return_value = mock_response
|
||||
mock_model.return_value = mock_instance
|
||||
|
||||
# Execute tool
|
||||
tool = TestTool()
|
||||
await tool.execute({"test": "value"})
|
||||
|
||||
# Verify model was created with MAX_OUTPUT_TOKENS
|
||||
mock_model.assert_called_once()
|
||||
call_args = mock_model.call_args
|
||||
|
||||
# Check generation_config
|
||||
assert "generation_config" in call_args[1]
|
||||
config = call_args[1]["generation_config"]
|
||||
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("google.generativeai.GenerativeModel")
|
||||
async def test_custom_max_tokens_override(self, mock_model):
|
||||
"""Test that custom max_tokens value overrides the default"""
|
||||
|
||||
class TestTool(BaseTool):
|
||||
def get_name(self):
|
||||
return "test_tool"
|
||||
|
||||
def get_description(self):
|
||||
return "Test tool"
|
||||
|
||||
def get_input_schema(self):
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"test": {"type": "string"},
|
||||
"max_tokens": {"type": "integer"}
|
||||
},
|
||||
"required": ["test"]
|
||||
}
|
||||
|
||||
def get_system_prompt(self):
|
||||
return "Test prompt"
|
||||
|
||||
def get_request_model(self):
|
||||
class TestRequest(ToolRequest):
|
||||
test: str
|
||||
return TestRequest
|
||||
|
||||
async def prepare_prompt(self, request):
|
||||
return f"Test: {request.test}"
|
||||
|
||||
# Mock response
|
||||
mock_response = Mock()
|
||||
mock_response.candidates = [Mock()]
|
||||
mock_response.candidates[0].content.parts = [Mock(text="Test response")]
|
||||
|
||||
mock_instance = Mock()
|
||||
mock_instance.generate_content.return_value = mock_response
|
||||
mock_model.return_value = mock_instance
|
||||
|
||||
# Execute tool with custom max_tokens
|
||||
tool = TestTool()
|
||||
custom_max_tokens = 16384
|
||||
await tool.execute({"test": "value", "max_tokens": custom_max_tokens})
|
||||
|
||||
# Verify model was created with custom max_tokens
|
||||
mock_model.assert_called_once()
|
||||
call_args = mock_model.call_args
|
||||
|
||||
# Check generation_config
|
||||
assert "generation_config" in call_args[1]
|
||||
config = call_args[1]["generation_config"]
|
||||
assert config["max_output_tokens"] == custom_max_tokens
|
||||
|
||||
|
||||
class TestServerMaxOutputTokens:
|
||||
"""Test that server.py properly uses MAX_OUTPUT_TOKENS"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("google.generativeai.GenerativeModel")
|
||||
async def test_handle_chat_uses_max_output_tokens(self, mock_model):
|
||||
"""Test that handle_chat uses MAX_OUTPUT_TOKENS"""
|
||||
from server import handle_chat
|
||||
|
||||
# Mock response
|
||||
mock_response = Mock()
|
||||
mock_response.candidates = [Mock()]
|
||||
mock_response.candidates[0].content.parts = [Mock(text="Chat response")]
|
||||
|
||||
mock_instance = Mock()
|
||||
mock_instance.generate_content.return_value = mock_response
|
||||
mock_model.return_value = mock_instance
|
||||
|
||||
# Call handle_chat
|
||||
await handle_chat({"prompt": "Test question"})
|
||||
|
||||
# Verify model was created with MAX_OUTPUT_TOKENS
|
||||
mock_model.assert_called_once()
|
||||
call_args = mock_model.call_args
|
||||
|
||||
# Check generation_config
|
||||
assert "generation_config" in call_args[1]
|
||||
config = call_args[1]["generation_config"]
|
||||
assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
|
||||
Reference in New Issue
Block a user