feat: Add comprehensive dynamic configuration system v3.3.0
## Major Features Added ### 🎯 Dynamic Configuration System - **Environment-aware model selection**: DEFAULT_MODEL with 'pro'/'flash' shortcuts - **Configurable thinking modes**: DEFAULT_THINKING_MODE_THINKDEEP for extended reasoning - **All tool schemas now dynamic**: Show actual current defaults instead of hardcoded values - **Enhanced setup workflow**: Copy from .env.example with smart customization ### 🔧 Model & Thinking Configuration - **Smart model resolution**: Support both shortcuts ('pro', 'flash') and full model names - **Thinking mode optimization**: Only apply thinking budget to models that support it - **Flash model compatibility**: Works without thinking config, still beneficial via system prompts - **Dynamic schema descriptions**: Tool parameters show current environment values ### 🚀 Enhanced Developer Experience - **Fail-fast Docker setup**: GEMINI_API_KEY required upfront in docker-compose - **Comprehensive startup logging**: Shows current model and thinking mode defaults - **Enhanced get_version tool**: Reports all dynamic configuration values - **Better .env documentation**: Clear token consumption details and model options ### 🧪 Comprehensive Testing - **Live model validation**: New simulator test validates Pro vs Flash thinking behavior - **Dynamic configuration tests**: Verify environment variable overrides work correctly - **Complete test coverage**: All 139 unit tests pass, including new model config tests ### 📋 Configuration Files Updated - **docker-compose.yml**: Fail-fast API key validation, thinking mode support - **setup-docker.sh**: Copy from .env.example instead of manual creation - **.env.example**: Detailed documentation with token consumption per thinking mode - **.gitignore**: Added test-setup/ for cleanup ### 🛠 Technical Improvements - **Removed setup.py**: Fully Docker-based deployment (no longer needed) - **REDIS_URL smart defaults**: Auto-configured for Docker, still configurable for dev - **All tools updated**: Consistent dynamic model parameter descriptions - **Enhanced error handling**: Better model resolution and validation ## Breaking Changes - Removed setup.py (Docker-only deployment) - Model parameter descriptions now show actual defaults (dynamic) ## Migration Guide - Update .env files using new .env.example format - Use 'pro'/'flash' shortcuts or full model names - Set DEFAULT_THINKING_MODE_THINKDEEP for custom thinking depth 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ from .test_content_validation import ContentValidationTest
|
||||
from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
|
||||
from .test_cross_tool_continuation import CrossToolContinuationTest
|
||||
from .test_logs_validation import LogsValidationTest
|
||||
from .test_model_thinking_config import TestModelThinkingConfig
|
||||
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
||||
from .test_redis_validation import RedisValidationTest
|
||||
|
||||
@@ -23,6 +24,7 @@ TEST_REGISTRY = {
|
||||
"cross_tool_comprehensive": CrossToolComprehensiveTest,
|
||||
"logs_validation": LogsValidationTest,
|
||||
"redis_validation": RedisValidationTest,
|
||||
"model_thinking_config": TestModelThinkingConfig,
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
@@ -34,5 +36,6 @@ __all__ = [
|
||||
"CrossToolComprehensiveTest",
|
||||
"LogsValidationTest",
|
||||
"RedisValidationTest",
|
||||
"TestModelThinkingConfig",
|
||||
"TEST_REGISTRY",
|
||||
]
|
||||
|
||||
177
simulator_tests/test_model_thinking_config.py
Normal file
177
simulator_tests/test_model_thinking_config.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Model Thinking Configuration Test
|
||||
|
||||
Tests that thinking configuration is properly applied only to models that support it,
|
||||
and that Flash models work correctly without thinking config.
|
||||
"""
|
||||
|
||||
from .base_test import BaseSimulatorTest
|
||||
|
||||
|
||||
class TestModelThinkingConfig(BaseSimulatorTest):
|
||||
"""Test model-specific thinking configuration behavior"""
|
||||
|
||||
@property
|
||||
def test_name(self) -> str:
|
||||
return "model_thinking_config"
|
||||
|
||||
@property
|
||||
def test_description(self) -> str:
|
||||
return "Model-specific thinking configuration behavior"
|
||||
|
||||
def test_pro_model_with_thinking_config(self):
|
||||
"""Test that Pro model uses thinking configuration"""
|
||||
self.logger.info("Testing Pro model with thinking configuration...")
|
||||
|
||||
try:
|
||||
# Test with explicit pro model and high thinking mode
|
||||
response, continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "What is 2 + 2? Please think carefully and explain.",
|
||||
"model": "pro", # Should resolve to gemini-2.5-pro-preview-06-05
|
||||
"thinking_mode": "high", # Should use thinking_config
|
||||
},
|
||||
)
|
||||
|
||||
if not response:
|
||||
raise Exception("Pro model test failed: No response received")
|
||||
|
||||
self.logger.info("✅ Pro model with thinking config works correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Pro model test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_flash_model_without_thinking_config(self):
|
||||
"""Test that Flash model works without thinking configuration"""
|
||||
self.logger.info("Testing Flash model without thinking configuration...")
|
||||
|
||||
try:
|
||||
# Test with explicit flash model and thinking mode (should be ignored)
|
||||
response, continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "What is 3 + 3? Give a quick answer.",
|
||||
"model": "flash", # Should resolve to gemini-2.0-flash-exp
|
||||
"thinking_mode": "high", # Should be ignored for Flash model
|
||||
},
|
||||
)
|
||||
|
||||
if not response:
|
||||
raise Exception("Flash model test failed: No response received")
|
||||
|
||||
self.logger.info("✅ Flash model without thinking config works correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
if "thinking" in str(e).lower() and ("not supported" in str(e).lower() or "invalid" in str(e).lower()):
|
||||
raise Exception(f"Flash model incorrectly tried to use thinking config: {e}")
|
||||
self.logger.error(f"❌ Flash model test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_model_resolution_logic(self):
|
||||
"""Test that model resolution works correctly for both shortcuts and full names"""
|
||||
self.logger.info("Testing model resolution logic...")
|
||||
|
||||
test_cases = [
|
||||
("pro", "should work with Pro model"),
|
||||
("flash", "should work with Flash model"),
|
||||
("gemini-2.5-pro-preview-06-05", "should work with full Pro model name"),
|
||||
("gemini-2.0-flash-exp", "should work with full Flash model name"),
|
||||
]
|
||||
|
||||
success_count = 0
|
||||
|
||||
for model_name, description in test_cases:
|
||||
try:
|
||||
response, continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": f"Test with {model_name}: What is 1 + 1?",
|
||||
"model": model_name,
|
||||
"thinking_mode": "medium",
|
||||
},
|
||||
)
|
||||
|
||||
if not response:
|
||||
raise Exception(f"No response received for model {model_name}")
|
||||
|
||||
self.logger.info(f"✅ {model_name} {description}")
|
||||
success_count += 1
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ {model_name} failed: {e}")
|
||||
return False
|
||||
|
||||
return success_count == len(test_cases)
|
||||
|
||||
def test_default_model_behavior(self):
|
||||
"""Test behavior with server default model (no explicit model specified)"""
|
||||
self.logger.info("Testing default model behavior...")
|
||||
|
||||
try:
|
||||
# Test without specifying model (should use server default)
|
||||
response, continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Test default model: What is 4 + 4?",
|
||||
# No model specified - should use DEFAULT_MODEL from config
|
||||
"thinking_mode": "medium",
|
||||
},
|
||||
)
|
||||
|
||||
if not response:
|
||||
raise Exception("Default model test failed: No response received")
|
||||
|
||||
self.logger.info("✅ Default model behavior works correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Default model test failed: {e}")
|
||||
return False
|
||||
|
||||
def run_test(self) -> bool:
|
||||
"""Run all model thinking configuration tests"""
|
||||
self.logger.info(f"📝 Test: {self.test_description}")
|
||||
|
||||
try:
|
||||
# Test Pro model with thinking config
|
||||
if not self.test_pro_model_with_thinking_config():
|
||||
return False
|
||||
|
||||
# Test Flash model without thinking config
|
||||
if not self.test_flash_model_without_thinking_config():
|
||||
return False
|
||||
|
||||
# Test model resolution logic
|
||||
if not self.test_model_resolution_logic():
|
||||
return False
|
||||
|
||||
# Test default model behavior
|
||||
if not self.test_default_model_behavior():
|
||||
return False
|
||||
|
||||
self.logger.info(f"✅ All {self.test_name} tests passed!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ {self.test_name} test failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Run the model thinking configuration tests"""
|
||||
import sys
|
||||
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
test = TestModelThinkingConfig(verbose=verbose)
|
||||
|
||||
success = test.run_test()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user