diff --git a/README.md b/README.md
index 4d0ad97..efa6b9b 100644
--- a/README.md
+++ b/README.md
@@ -262,6 +262,30 @@ Tools can reference files for additional context:
 "Get gemini to think deeper about my design, reference the current architecture.md"
 ```
 
+## Configuration
+
+The server includes several configurable properties that control its behavior:
+
+### Model Configuration
+- **`DEFAULT_MODEL`**: `"gemini-2.5-pro-preview-06-05"` - The default Gemini model used
+- **`MAX_CONTEXT_TOKENS`**: `1,000,000` - Maximum input context (1M tokens for Gemini 2.5 Pro)
+- **`MAX_OUTPUT_TOKENS`**: `32,768` - Maximum output tokens per response
+
+### Temperature Defaults
+Different tools use optimized temperature settings:
+- **`TEMPERATURE_ANALYTICAL`**: `0.2` - Used for code review and debugging (focused, deterministic)
+- **`TEMPERATURE_BALANCED`**: `0.5` - Used for general chat (balanced creativity/accuracy)
+- **`TEMPERATURE_CREATIVE`**: `0.7` - Used for deep thinking and architecture (more creative)
+
+### Customizing Output Length
+Each tool accepts an optional `max_tokens` parameter to override the default:
+```
+"Use gemini to analyze main.py with max_tokens 16000"
+"Get gemini to think deeper about this design with max_tokens 50000"
+```
+
+Note: The maximum supported output is 32,768 tokens for Gemini 2.5 Pro.
+
 ## Installation
 
 1. Clone the repository:
@@ -286,14 +310,37 @@ Tools can reference files for additional context:
    export GEMINI_API_KEY="your-api-key-here"
    ```
 
+## How System Prompts Work
+
+The server uses carefully crafted system prompts to give each tool specialized expertise:
+
+### Prompt Architecture
+- **Centralized Prompts**: All system prompts are defined in `prompts/tool_prompts.py`
+- **Tool Integration**: Each tool inherits from `BaseTool` and implements `get_system_prompt()`
+- **Prompt Flow**: `User Request → Tool Selection → System Prompt + Context → Gemini Response`
+
+### Specialized Expertise
+Each tool has a unique system prompt that defines its role and approach:
+- **`think_deeper`**: Acts as a senior development partner, challenging assumptions and finding edge cases
+- **`review_code`**: Expert code reviewer with security/performance focus, uses severity levels
+- **`debug_issue`**: Systematic debugger providing root cause analysis and prevention strategies
+- **`analyze`**: Code analyst focusing on architecture, patterns, and actionable insights
+
+### Customization
+To modify tool behavior, you can:
+1. Edit prompts in `prompts/tool_prompts.py` for global changes
+2. Override `get_system_prompt()` in a tool class for tool-specific changes
+3. Use the `temperature` parameter to adjust response style (0.2 for focused, 0.7 for creative)
+
 ## Contributing
 
 We welcome contributions! The modular architecture makes it easy to add new tools:
 
 1. Create a new tool in `tools/`
 2. Inherit from `BaseTool`
-3. Implement required methods
-4. Add to `TOOLS` in `server.py`
+3. Implement required methods (including `get_system_prompt()`)
+4. Add your system prompt to `prompts/tool_prompts.py`
+5. Register your tool in `TOOLS` dict in `server.py`
 
 See existing tools for examples.
 
diff --git a/config.py b/config.py
index b60218d..4ec7009 100644
--- a/config.py
+++ b/config.py
@@ -10,6 +10,7 @@ __author__ = "Fahad Gilani"
 # Model configuration
 DEFAULT_MODEL = "gemini-2.5-pro-preview-06-05"
 MAX_CONTEXT_TOKENS = 1_000_000  # 1M tokens for Gemini Pro
+MAX_OUTPUT_TOKENS = 32_768  # Maximum output tokens for Gemini 2.5 Pro
 
 # Temperature defaults for different tool types
 TEMPERATURE_ANALYTICAL = 0.2  # For code review, debugging
diff --git a/server.py b/server.py
index 4dae804..2ac214d 100644
--- a/server.py
+++ b/server.py
@@ -15,7 +15,7 @@ from mcp.server.models import InitializationOptions
 from mcp.server.stdio import stdio_server
 from mcp.types import TextContent, Tool
 
-from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__,
+from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS, __author__, __updated__,
                     __version__)
 from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool
 
@@ -160,7 +160,7 @@ async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]:
             model_name=DEFAULT_MODEL,
             generation_config={
                 "temperature": temperature,
-                "max_output_tokens": 8192,
+                "max_output_tokens": MAX_OUTPUT_TOKENS,
                 "candidate_count": 1,
             },
         )
diff --git a/tests/test_config.py b/tests/test_config.py
index 485e30d..fbcaa16 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -2,9 +2,10 @@
 Tests for configuration
 """
 
-from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL,
-                    TEMPERATURE_BALANCED, TEMPERATURE_CREATIVE, TOOL_TRIGGERS,
-                    __author__, __updated__, __version__)
+from config import (DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MAX_OUTPUT_TOKENS,
+                    TEMPERATURE_ANALYTICAL, TEMPERATURE_BALANCED,
+                    TEMPERATURE_CREATIVE, TOOL_TRIGGERS, __author__,
+                    __updated__, __version__)
 
 
 class TestConfig:
@@ -20,6 +21,7 @@ class TestConfig:
         """Test model configuration"""
         assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
         assert MAX_CONTEXT_TOKENS == 1_000_000
+        assert MAX_OUTPUT_TOKENS == 32_768
 
     def test_temperature_defaults(self):
         """Test temperature constants"""
diff --git a/tests/test_output_tokens.py b/tests/test_output_tokens.py
new file mode 100644
index 0000000..484b949
--- /dev/null
+++ b/tests/test_output_tokens.py
@@ -0,0 +1,165 @@
+"""
+Tests for MAX_OUTPUT_TOKENS configuration
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+from config import MAX_OUTPUT_TOKENS
+from tools.base import BaseTool, ToolRequest
+
+
+class TestMaxOutputTokens:
+    """Test that MAX_OUTPUT_TOKENS is properly applied"""
+
+    def test_max_output_tokens_value(self):
+        """Test the MAX_OUTPUT_TOKENS constant value"""
+        assert MAX_OUTPUT_TOKENS == 32_768
+
+    def test_tool_request_default_max_tokens(self):
+        """Test that ToolRequest has correct default max_tokens"""
+        request = ToolRequest()
+        assert request.max_tokens == MAX_OUTPUT_TOKENS
+
+    @pytest.mark.asyncio
+    @patch("google.generativeai.GenerativeModel")
+    async def test_base_tool_uses_max_output_tokens(self, mock_model):
+        """Test that BaseTool properly uses MAX_OUTPUT_TOKENS in model creation"""
+
+        # Create a concrete implementation of BaseTool for testing
+        class TestTool(BaseTool):
+            def get_name(self):
+                return "test_tool"
+
+            def get_description(self):
+                return "Test tool"
+
+            def get_input_schema(self):
+                return {
+                    "type": "object",
+                    "properties": {
+                        "test": {"type": "string"}
+                    },
+                    "required": ["test"]
+                }
+
+            def get_system_prompt(self):
+                return "Test prompt"
+
+            def get_request_model(self):
+                class TestRequest(ToolRequest):
+                    test: str
+                return TestRequest
+
+            async def prepare_prompt(self, request):
+                return f"Test: {request.test}"
+
+        # Mock response
+        mock_response = Mock()
+        mock_response.candidates = [Mock()]
+        mock_response.candidates[0].content.parts = [Mock(text="Test response")]
+
+        mock_instance = Mock()
+        mock_instance.generate_content.return_value = mock_response
+        mock_model.return_value = mock_instance
+
+        # Execute tool
+        tool = TestTool()
+        await tool.execute({"test": "value"})
+
+        # Verify model was created with MAX_OUTPUT_TOKENS
+        mock_model.assert_called_once()
+        call_args = mock_model.call_args
+
+        # Check generation_config
+        assert "generation_config" in call_args[1]
+        config = call_args[1]["generation_config"]
+        assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
+
+    @pytest.mark.asyncio
+    @patch("google.generativeai.GenerativeModel")
+    async def test_custom_max_tokens_override(self, mock_model):
+        """Test that custom max_tokens value overrides the default"""
+
+        class TestTool(BaseTool):
+            def get_name(self):
+                return "test_tool"
+
+            def get_description(self):
+                return "Test tool"
+
+            def get_input_schema(self):
+                return {
+                    "type": "object",
+                    "properties": {
+                        "test": {"type": "string"},
+                        "max_tokens": {"type": "integer"}
+                    },
+                    "required": ["test"]
+                }
+
+            def get_system_prompt(self):
+                return "Test prompt"
+
+            def get_request_model(self):
+                class TestRequest(ToolRequest):
+                    test: str
+                return TestRequest
+
+            async def prepare_prompt(self, request):
+                return f"Test: {request.test}"
+
+        # Mock response
+        mock_response = Mock()
+        mock_response.candidates = [Mock()]
+        mock_response.candidates[0].content.parts = [Mock(text="Test response")]
+
+        mock_instance = Mock()
+        mock_instance.generate_content.return_value = mock_response
+        mock_model.return_value = mock_instance
+
+        # Execute tool with custom max_tokens
+        tool = TestTool()
+        custom_max_tokens = 16384
+        await tool.execute({"test": "value", "max_tokens": custom_max_tokens})
+
+        # Verify model was created with custom max_tokens
+        mock_model.assert_called_once()
+        call_args = mock_model.call_args
+
+        # Check generation_config
+        assert "generation_config" in call_args[1]
+        config = call_args[1]["generation_config"]
+        assert config["max_output_tokens"] == custom_max_tokens
+
+
+class TestServerMaxOutputTokens:
+    """Test that server.py properly uses MAX_OUTPUT_TOKENS"""
+
+    @pytest.mark.asyncio
+    @patch("google.generativeai.GenerativeModel")
+    async def test_handle_chat_uses_max_output_tokens(self, mock_model):
+        """Test that handle_chat uses MAX_OUTPUT_TOKENS"""
+        from server import handle_chat
+
+        # Mock response
+        mock_response = Mock()
+        mock_response.candidates = [Mock()]
+        mock_response.candidates[0].content.parts = [Mock(text="Chat response")]
+
+        mock_instance = Mock()
+        mock_instance.generate_content.return_value = mock_response
+        mock_model.return_value = mock_instance
+
+        # Call handle_chat
+        await handle_chat({"prompt": "Test question"})
+
+        # Verify model was created with MAX_OUTPUT_TOKENS
+        mock_model.assert_called_once()
+        call_args = mock_model.call_args
+
+        # Check generation_config
+        assert "generation_config" in call_args[1]
+        config = call_args[1]["generation_config"]
+        assert config["max_output_tokens"] == MAX_OUTPUT_TOKENS
diff --git a/tools/base.py b/tools/base.py
index 54e1c70..8ad5fe0 100644
--- a/tools/base.py
+++ b/tools/base.py
@@ -9,6 +9,8 @@ import google.generativeai as genai
 from mcp.types import TextContent
 from pydantic import BaseModel, Field
 
+from config import MAX_OUTPUT_TOKENS
+
 
 class ToolRequest(BaseModel):
     """Base request model for all tools"""
@@ -17,7 +19,7 @@ class ToolRequest(BaseModel):
         None, description="Model to use (defaults to Gemini 2.5 Pro)"
     )
     max_tokens: Optional[int] = Field(
-        8192, description="Maximum number of tokens in response"
+        MAX_OUTPUT_TOKENS, description="Maximum number of tokens in response"
     )
     temperature: Optional[float] = Field(
         None, description="Temperature for response (tool-specific defaults)"
@@ -78,7 +80,7 @@ class BaseTool(ABC):
             temperature = getattr(request, "temperature", None)
             if temperature is None:
                 temperature = self.get_default_temperature()
-            max_tokens = getattr(request, "max_tokens", 8192)
+            max_tokens = getattr(request, "max_tokens", MAX_OUTPUT_TOKENS)
 
             # Create and configure model
             model = self.create_model(model_name, temperature, max_tokens)