* WIP Refactor resolving mode_names, should be done once at MCP call boundary Pass around model context instead Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses. * Deduplication of model resolution, model_context should be available before reaching deeper parts of the code Improved abstraction when building conversations Throw programmer errors early * Guardrails Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus Model name now supports an optional ":option" for future use * Simplified async flow * Improved model for request to support natural language Simplified async flow * Improved model for request to support natural language Simplified async flow * Fix consensus tool async/sync patterns to match codebase standards CRITICAL FIXES: - Converted _get_consensus_responses from async to sync (matches other tools) - Converted store_conversation_turn from async to sync (add_turn is synchronous) - Removed unnecessary asyncio imports and sleep calls - Fixed ClosedResourceError in MCP protocol during long consensus operations PATTERN ALIGNMENT: - Consensus tool now follows same sync patterns as all other tools - Only execute() and prepare_prompt() are async (base class requirement) - All internal operations are synchronous like analyze, chat, debug, etc. TESTING: - MCP simulation test now passes: consensus_stance ✅ - Two-model consensus works correctly in ~35 seconds - Unknown stance handling defaults to neutral with warnings - All 9 unit tests pass (100% success rate) The consensus tool async patterns were anomalous in the codebase. This fix aligns it with the established synchronous patterns used by all other tools while maintaining full functionality. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed call order and added new test * Cleanup dead comments Docs for the new tool Improved tests --------- Co-authored-by: Claude <noreply@anthropic.com>
87 lines
3.6 KiB
Python
87 lines
3.6 KiB
Python
"""
|
|
Communication Simulator Tests Package
|
|
|
|
This package contains individual test modules for the Zen MCP Communication Simulator.
|
|
Each test is in its own file for better organization and maintainability.
|
|
"""
|
|
|
|
from .base_test import BaseSimulatorTest
|
|
from .test_basic_conversation import BasicConversationTest
|
|
from .test_consensus_conversation import TestConsensusConversation
|
|
from .test_consensus_stance import TestConsensusStance
|
|
from .test_consensus_three_models import TestConsensusThreeModels
|
|
from .test_content_validation import ContentValidationTest
|
|
from .test_conversation_chain_validation import ConversationChainValidationTest
|
|
from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
|
|
from .test_cross_tool_continuation import CrossToolContinuationTest
|
|
from .test_line_number_validation import LineNumberValidationTest
|
|
from .test_logs_validation import LogsValidationTest
|
|
from .test_model_thinking_config import TestModelThinkingConfig
|
|
from .test_o3_model_selection import O3ModelSelectionTest
|
|
from .test_o3_pro_expensive import O3ProExpensiveTest
|
|
from .test_ollama_custom_url import OllamaCustomUrlTest
|
|
from .test_openrouter_fallback import OpenRouterFallbackTest
|
|
from .test_openrouter_models import OpenRouterModelsTest
|
|
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
|
from .test_redis_validation import RedisValidationTest
|
|
from .test_refactor_validation import RefactorValidationTest
|
|
from .test_testgen_validation import TestGenValidationTest
|
|
from .test_token_allocation_validation import TokenAllocationValidationTest
|
|
from .test_vision_capability import VisionCapabilityTest
|
|
from .test_xai_models import XAIModelsTest
|
|
|
|
# Test registry for dynamic loading
|
|
TEST_REGISTRY = {
|
|
"basic_conversation": BasicConversationTest,
|
|
"content_validation": ContentValidationTest,
|
|
"per_tool_deduplication": PerToolDeduplicationTest,
|
|
"cross_tool_continuation": CrossToolContinuationTest,
|
|
"cross_tool_comprehensive": CrossToolComprehensiveTest,
|
|
"line_number_validation": LineNumberValidationTest,
|
|
"logs_validation": LogsValidationTest,
|
|
"redis_validation": RedisValidationTest,
|
|
"model_thinking_config": TestModelThinkingConfig,
|
|
"o3_model_selection": O3ModelSelectionTest,
|
|
"ollama_custom_url": OllamaCustomUrlTest,
|
|
"openrouter_fallback": OpenRouterFallbackTest,
|
|
"openrouter_models": OpenRouterModelsTest,
|
|
"token_allocation_validation": TokenAllocationValidationTest,
|
|
"testgen_validation": TestGenValidationTest,
|
|
"refactor_validation": RefactorValidationTest,
|
|
"conversation_chain_validation": ConversationChainValidationTest,
|
|
"vision_capability": VisionCapabilityTest,
|
|
"xai_models": XAIModelsTest,
|
|
"consensus_conversation": TestConsensusConversation,
|
|
"consensus_stance": TestConsensusStance,
|
|
"consensus_three_models": TestConsensusThreeModels,
|
|
# "o3_pro_expensive": O3ProExpensiveTest, # COMMENTED OUT - too expensive to run by default
|
|
}
|
|
|
|
__all__ = [
|
|
"BaseSimulatorTest",
|
|
"BasicConversationTest",
|
|
"ContentValidationTest",
|
|
"PerToolDeduplicationTest",
|
|
"CrossToolContinuationTest",
|
|
"CrossToolComprehensiveTest",
|
|
"LineNumberValidationTest",
|
|
"LogsValidationTest",
|
|
"RedisValidationTest",
|
|
"TestModelThinkingConfig",
|
|
"O3ModelSelectionTest",
|
|
"O3ProExpensiveTest",
|
|
"OllamaCustomUrlTest",
|
|
"OpenRouterFallbackTest",
|
|
"OpenRouterModelsTest",
|
|
"TokenAllocationValidationTest",
|
|
"TestGenValidationTest",
|
|
"RefactorValidationTest",
|
|
"ConversationChainValidationTest",
|
|
"VisionCapabilityTest",
|
|
"XAIModelsTest",
|
|
"TestConsensusConversation",
|
|
"TestConsensusStance",
|
|
"TestConsensusThreeModels",
|
|
"TEST_REGISTRY",
|
|
]
|