#!/usr/bin/env python3 """ Test script for the enhanced consensus tool with ModelConfig objects """ import asyncio import json import sys from tools.consensus import ConsensusTool async def test_enhanced_consensus(): """Test the enhanced consensus tool with custom stance prompts""" print("๐Ÿงช Testing Enhanced Consensus Tool") print("=" * 50) # Test all stance synonyms work print("๐Ÿ“ Testing stance synonym normalization...") tool = ConsensusTool() test_synonyms = [ ("support", "for"), ("favor", "for"), ("oppose", "against"), ("critical", "against"), ("neutral", "neutral"), ("for", "for"), ("against", "against"), # Test unknown stances default to neutral ("maybe", "neutral"), ("supportive", "neutral"), ("random", "neutral"), ] for input_stance, expected in test_synonyms: normalized = tool._normalize_stance(input_stance) status = "โœ…" if normalized == expected else "โŒ" print(f"{status} '{input_stance}' โ†’ '{normalized}' (expected: '{expected}')") print() # Create consensus tool instance tool = ConsensusTool() # Test arguments with new ModelConfig format test_arguments = { "prompt": "Should we add a pizza ordering button to our enterprise software?", "models": [ { "model": "flash", "stance": "support", # Test synonym "stance_prompt": "You are a user experience advocate. Focus on how this feature could improve user engagement and satisfaction. Consider the human elements - how might this bring joy to users' workday? Think about unexpected benefits and creative use cases.", }, { "model": "flash", "stance": "oppose", # Test synonym "stance_prompt": "You are a software architecture specialist. Focus on technical concerns: code maintainability, security implications, scope creep, and system complexity. Consider long-term costs and potential maintenance burden.", }, ], "focus_areas": ["user experience", "technical complexity", "business value"], "temperature": 0.3, } try: print("๐Ÿ“ Test Arguments:") print(json.dumps(test_arguments, indent=2)) print() print("๐Ÿš€ Executing consensus tool...") # Execute the tool result = await tool.execute(test_arguments) print("โœ… Consensus tool execution completed!") print() # Parse and display results if result and len(result) > 0: response_text = result[0].text try: response_data = json.loads(response_text) print("๐Ÿ“Š Consensus Results:") print(f"Status: {response_data.get('status', 'unknown')}") if response_data.get("status") == "consensus_success": models_used = response_data.get("models_used", []) print(f"Models used: {', '.join(models_used)}") responses = response_data.get("responses", []) print(f"\n๐ŸŽญ Individual Model Responses ({len(responses)} total):") for i, resp in enumerate(responses, 1): model = resp.get("model", "unknown") stance = resp.get("stance", "neutral") status = resp.get("status", "unknown") print(f"\n{i}. {model.upper()} ({stance} stance) - {status}") if status == "success": verdict = resp.get("verdict", "No verdict") custom_prompt = resp.get("metadata", {}).get("custom_stance_prompt", False) print(f" Custom prompt used: {'Yes' if custom_prompt else 'No'}") print(f" Verdict preview: {verdict[:200]}...") # Show stance normalization worked if stance in ["support", "oppose"]: expected = "for" if stance == "support" else "against" print(f" โœ… Stance '{stance}' normalized correctly") else: error = resp.get("error", "Unknown error") print(f" Error: {error}") else: print(f"โŒ Consensus failed: {response_data.get('error', 'Unknown error')}") except json.JSONDecodeError: print("๐Ÿ“„ Raw response (not JSON):") print(response_text[:500] + "..." if len(response_text) > 500 else response_text) else: print("โŒ No response received from consensus tool") except Exception as e: print(f"โŒ Test failed with exception: {str(e)}") import traceback traceback.print_exc() return False print("\n๐ŸŽ‰ Enhanced consensus tool test completed!") return True if __name__ == "__main__": # Run the test success = asyncio.run(test_enhanced_consensus()) sys.exit(0 if success else 1)