diff --git a/README.md b/README.md index 4d0ad97..efa6b9b 100644 --- a/README.md +++ b/README.md @@ -262,6 +262,30 @@ Tools can reference files for additional context: "Get gemini to think deeper about my design, reference the current architecture.md" ``` +## Configuration + +The server includes several configurable properties that control its behavior: + +### Model Configuration +- **`DEFAULT_MODEL`**: `"gemini-2.5-pro-preview-06-05"` - The default Gemini model used +- **`MAX_CONTEXT_TOKENS`**: `1,000,000` - Maximum input context (1M tokens for Gemini 2.5 Pro) +- **`MAX_OUTPUT_TOKENS`**: `32,768` - Maximum output tokens per response + +### Temperature Defaults +Different tools use optimized temperature settings: +- **`TEMPERATURE_ANALYTICAL`**: `0.2` - Used for code review and debugging (focused, deterministic) +- **`TEMPERATURE_BALANCED`**: `0.5` - Used for general chat (balanced creativity/accuracy) +- **`TEMPERATURE_CREATIVE`**: `0.7` - Used for deep thinking and architecture (more creative) + +### Customizing Output Length +Each tool accepts an optional `max_tokens` parameter to override the default: +``` +"Use gemini to analyze main.py with max_tokens 16000" +"Get gemini to think deeper about this design with max_tokens 50000" +``` + +Note: The maximum supported output is 32,768 tokens for Gemini 2.5 Pro. + ## Installation 1. Clone the repository: @@ -286,14 +310,37 @@ Tools can reference files for additional context: export GEMINI_API_KEY="your-api-key-here" ``` +## How System Prompts Work + +The server uses carefully crafted system prompts to give each tool specialized expertise: + +### Prompt Architecture +- **Centralized Prompts**: All system prompts are defined in `prompts/tool_prompts.py` +- **Tool Integration**: Each tool inherits from `BaseTool` and implements `get_system_prompt()` +- **Prompt Flow**: `User Request → Tool Selection → System Prompt + Context → Gemini Response` + +### Specialized Expertise +Each tool has a unique system prompt that defines its role and approach: +- **`think_deeper`**: Acts as a senior development partner, challenging assumptions and finding edge cases +- **`review_code`**: Expert code reviewer with security/performance focus, uses severity levels +- **`debug_issue`**: Systematic debugger providing root cause analysis and prevention strategies +- **`analyze`**: Code analyst focusing on architecture, patterns, and actionable insights + +### Customization +To modify tool behavior, you can: +1. Edit prompts in `prompts/tool_prompts.py` for global changes +2. Override `get_system_prompt()` in a tool class for tool-specific changes +3. Use the `temperature` parameter to adjust response style (0.2 for focused, 0.7 for creative) + ## Contributing We welcome contributions! The modular architecture makes it easy to add new tools: 1. Create a new tool in `tools/` 2. Inherit from `BaseTool` -3. Implement required methods -4. Add to `TOOLS` in `server.py` +3. Implement required methods (including `get_system_prompt()`) +4. Add your system prompt to `prompts/tool_prompts.py` +5. Register your tool in `TOOLS` dict in `server.py` See existing tools for examples. diff --git a/config.py b/config.py index b60218d..4ec7009 100644 --- a/config.py +++ b/config.py @@ -10,6 +10,7 @@ __author__ = "Fahad Gilani" # Model configuration DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05" MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro +MAX_OUTPUT_TOKENS = 32_768 # Maximum output tokens for Gemini 2.5 Pro # Temperature defaults for different tool types TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging diff --git a/server.py b/server.py index 4dae804..2ac214d 100644 --- a/server.py +++ b/server.py @@ -15,7 +15,7 @@ from mcp.server.models import InitializationOptions from mcp.server.stdio import stdio_server from mcp.types import TextContent, Tool -from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__, +from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, __author__, __updated__, __version__) from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool @@ -160,7 +160,7 @@ async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]: model_name=DEFAULT_MODEL, generation_config={ "temperature": temperature, - "max_output_tokens": 8192, + "max_output_tokens": MAX_OUTPUT_TOKENS, "candidate_count": 1, }, ) diff --git a/tests/test_config.py b/tests/test_config.py index 485e30d..fbcaa16 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,9 +2,10 @@ Tests for configuration """ -from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL, - TEMPERATURE_BALANCED, TEMPERATURE_CREATIVE, TOOL_TRIGGERS, - __author__, __updated__, __version__) +from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, + TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED, + TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__, + __updated__, __version__) class TestConfig: @@ -20,6 +21,7 @@ class TestConfig: """Test model configuration""" assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05" assert MAX_CONTEXT_TOKENS == 1_000_000 + assert MAX_OUTPUT_TOKENS == 32_768 def test_temperature_defaults(self): """Test temperature constants""" diff --git a/tests/test_output_tokens.py b/tests/test_output_tokens.py new file mode 100644 index 0000000..484b949 --- /dev/null +++ b/tests/test_output_tokens.py @@ -0,0 +1,165 @@ +""" +Tests for MAX_OUTPUT_TOKENS configuration +""" + +from unittest.mock import Mock, patch + +import pytest + +from config import MAX_OUTPUT_TOKENS +from tools.base import BaseTool, ToolRequest + + +class TestMaxOutputTokens: + """Test that MAX_OUTPUT_TOKENS is properly applied""" + + def test_max_output_tokens_value(self): + """Test the MAX_OUTPUT_TOKENS constant value""" + assert MAX_OUTPUT_TOKENS == 32_768 + + def test_tool_request_default_max_tokens(self): + """Test that ToolRequest has correct default max_tokens""" + request = ToolRequest() + assert request.max_tokens == MAX_OUTPUT_TOKENS + + @pytest.mark.asyncio + @patch("google.generativeai.GenerativeModel") + async def test_base_tool_uses_max_output_tokens(self, mock_model): + """Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation""" + + # Create a concrete implementation of BaseTool for testing + class TestTool(BaseTool): + def get_name(self): + return "test_tool" + + def get_description(self): + return "Test tool" + + def get_input_schema(self): + return { + "type": "object", + "properties": { + "test": {"type": "string"} + }, + "required": ["test"] + } + + def get_system_prompt(self): + return "Test prompt" + + def get_request_model(self): + class TestRequest(ToolRequest): + test: str + return TestRequest + + async def prepare_prompt(self, request): + return f"Test: {request.test}" + + # Mock response + mock_response = Mock() + mock_response.candidates = [Mock()] + mock_response.candidates[0].content.parts = [Mock(text="Test response")] + + mock_instance = Mock() + mock_instance.generate_content.return_value = mock_response + mock_model.return_value = mock_instance + + # Execute tool + tool = TestTool() + await tool.execute({"test": "value"}) + + # Verify model was created with MAX_OUTPUT_TOKENS + mock_model.assert_called_once() + call_args = mock_model.call_args + + # Check generation_config + assert "generation_config" in call_args[1] + config = call_args[1]["generation_config"] + assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS + + @pytest.mark.asyncio + @patch("google.generativeai.GenerativeModel") + async def test_custom_max_tokens_override(self, mock_model): + """Test that custom max_tokens value overrides the default""" + + class TestTool(BaseTool): + def get_name(self): + return "test_tool" + + def get_description(self): + return "Test tool" + + def get_input_schema(self): + return { + "type": "object", + "properties": { + "test": {"type": "string"}, + "max_tokens": {"type": "integer"} + }, + "required": ["test"] + } + + def get_system_prompt(self): + return "Test prompt" + + def get_request_model(self): + class TestRequest(ToolRequest): + test: str + return TestRequest + + async def prepare_prompt(self, request): + return f"Test: {request.test}" + + # Mock response + mock_response = Mock() + mock_response.candidates = [Mock()] + mock_response.candidates[0].content.parts = [Mock(text="Test response")] + + mock_instance = Mock() + mock_instance.generate_content.return_value = mock_response + mock_model.return_value = mock_instance + + # Execute tool with custom max_tokens + tool = TestTool() + custom_max_tokens = 16384 + await tool.execute({"test": "value", "max_tokens": custom_max_tokens}) + + # Verify model was created with custom max_tokens + mock_model.assert_called_once() + call_args = mock_model.call_args + + # Check generation_config + assert "generation_config" in call_args[1] + config = call_args[1]["generation_config"] + assert config["max_output_tokens"] == custom_max_tokens + + +class TestServerMaxOutputTokens: + """Test that server.py properly uses MAX_OUTPUT_TOKENS""" + + @pytest.mark.asyncio + @patch("google.generativeai.GenerativeModel") + async def test_handle_chat_uses_max_output_tokens(self, mock_model): + """Test that handle_chat uses MAX_OUTPUT_TOKENS""" + from server import handle_chat + + # Mock response + mock_response = Mock() + mock_response.candidates = [Mock()] + mock_response.candidates[0].content.parts = [Mock(text="Chat response")] + + mock_instance = Mock() + mock_instance.generate_content.return_value = mock_response + mock_model.return_value = mock_instance + + # Call handle_chat + await handle_chat({"prompt": "Test question"}) + + # Verify model was created with MAX_OUTPUT_TOKENS + mock_model.assert_called_once() + call_args = mock_model.call_args + + # Check generation_config + assert "generation_config" in call_args[1] + config = call_args[1]["generation_config"] + assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS diff --git a/tools/base.py b/tools/base.py index 54e1c70..8ad5fe0 100644 --- a/tools/base.py +++ b/tools/base.py @@ -9,6 +9,8 @@ import google.generativeai as genai from mcp.types import TextContent from pydantic import BaseModel, Field +from config import MAX_OUTPUT_TOKENS + class ToolRequest(BaseModel): """Base request model for all tools""" @@ -17,7 +19,7 @@ class ToolRequest(BaseModel): None, description="Model to use (defaults to Gemini 2.5 Pro)" ) max_tokens: Optional[int] = Field( - 8192, description="Maximum number of tokens in response" + MAX_OUTPUT_TOKENS, description="Maximum number of tokens in response" ) temperature: Optional[float] = Field( None, description="Temperature for response (tool-specific defaults)" @@ -78,7 +80,7 @@ class BaseTool(ABC): temperature = getattr(request, "temperature", None) if temperature is None: temperature = self.get_default_temperature() - max_tokens = getattr(request, "max_tokens", 8192) + max_tokens = getattr(request, "max_tokens", MAX_OUTPUT_TOKENS) # Create and configure model model = self.create_model(model_name, temperature, max_tokens)