Files
my-pal-mcp-server/simulator_tests/base_test.py
Beehive Innovations 95556ba9ea Add Consensus Tool for Multi-Model Perspective Gathering (#67)
* WIP
Refactor resolving mode_names, should be done once at MCP call boundary
Pass around model context instead
Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses.

* Deduplication of model resolution, model_context should be available before reaching deeper parts of the code
Improved abstraction when building conversations
Throw programmer errors early

* Guardrails
Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus
Model name now supports an optional ":option" for future use

* Simplified async flow

* Improved model for request to support natural language
Simplified async flow

* Improved model for request to support natural language
Simplified async flow

* Fix consensus tool async/sync patterns to match codebase standards

CRITICAL FIXES:
- Converted _get_consensus_responses from async to sync (matches other tools)
- Converted store_conversation_turn from async to sync (add_turn is synchronous)
- Removed unnecessary asyncio imports and sleep calls
- Fixed ClosedResourceError in MCP protocol during long consensus operations

PATTERN ALIGNMENT:
- Consensus tool now follows same sync patterns as all other tools
- Only execute() and prepare_prompt() are async (base class requirement)
- All internal operations are synchronous like analyze, chat, debug, etc.

TESTING:
- MCP simulation test now passes: consensus_stance 
- Two-model consensus works correctly in ~35 seconds
- Unknown stance handling defaults to neutral with warnings
- All 9 unit tests pass (100% success rate)

The consensus tool async patterns were anomalous in the codebase.
This fix aligns it with the established synchronous patterns used
by all other tools while maintaining full functionality.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Fixed call order and added new test

* Cleanup dead comments
Docs for the new tool
Improved tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-06-17 10:53:17 +04:00

279 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Base Test Class for Communication Simulator Tests
Provides common functionality and utilities for all simulator tests.
"""
import json
import logging
import os
import subprocess
from typing import Optional
class BaseSimulatorTest:
"""Base class for all communication simulator tests"""
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.test_files = {}
self.test_dir = None
self.container_name = "zen-mcp-server"
self.redis_container = "zen-mcp-redis"
# Configure logging
log_level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
self.logger = logging.getLogger(self.__class__.__name__)
def setup_test_files(self):
"""Create test files for the simulation"""
# Test Python file
python_content = '''"""
Sample Python module for testing MCP conversation continuity
"""
def fibonacci(n):
"""Calculate fibonacci number recursively"""
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
def factorial(n):
"""Calculate factorial iteratively"""
result = 1
for i in range(1, n + 1):
result *= i
return result
class Calculator:
"""Simple calculator class"""
def __init__(self):
self.history = []
def add(self, a, b):
result = a + b
self.history.append(f"{a} + {b} = {result}")
return result
def multiply(self, a, b):
result = a * b
self.history.append(f"{a} * {b} = {result}")
return result
'''
# Test configuration file
config_content = """{
"database": {
"host": "localhost",
"port": 5432,
"name": "testdb",
"ssl": true
},
"cache": {
"redis_url": "redis://localhost:6379",
"ttl": 3600
},
"logging": {
"level": "INFO",
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
}
}"""
# Create files in the current project directory
current_dir = os.getcwd()
self.test_dir = os.path.join(current_dir, "test_simulation_files")
os.makedirs(self.test_dir, exist_ok=True)
test_py = os.path.join(self.test_dir, "test_module.py")
test_config = os.path.join(self.test_dir, "config.json")
with open(test_py, "w") as f:
f.write(python_content)
with open(test_config, "w") as f:
f.write(config_content)
# Ensure absolute paths for MCP server compatibility
self.test_files = {"python": os.path.abspath(test_py), "config": os.path.abspath(test_config)}
self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}")
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
"""Call an MCP tool via Claude CLI (docker exec)"""
try:
# Prepare the MCP initialization and tool call sequence
init_request = {
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {
"protocolVersion": "2024-11-05",
"capabilities": {"tools": {}},
"clientInfo": {"name": "communication-simulator", "version": "1.0.0"},
},
}
# Send initialized notification
initialized_notification = {"jsonrpc": "2.0", "method": "notifications/initialized"}
# Prepare the tool call request
tool_request = {
"jsonrpc": "2.0",
"id": 2,
"method": "tools/call",
"params": {"name": tool_name, "arguments": params},
}
# Combine all messages
messages = [json.dumps(init_request), json.dumps(initialized_notification), json.dumps(tool_request)]
# Join with newlines as MCP expects
input_data = "\n".join(messages) + "\n"
# Simulate Claude CLI calling the MCP server via docker exec
docker_cmd = ["docker", "exec", "-i", self.container_name, "python", "server.py"]
self.logger.debug(f"Calling MCP tool {tool_name} with proper initialization")
# Execute the command with proper handling for async responses
# For consensus tool and other long-running tools, we need to ensure
# the subprocess doesn't close prematurely
result = subprocess.run(
docker_cmd,
input=input_data,
text=True,
capture_output=True,
timeout=3600, # 1 hour timeout
check=False, # Don't raise on non-zero exit code
)
if result.returncode != 0:
self.logger.error(f"Docker exec failed with return code {result.returncode}")
self.logger.error(f"Stderr: {result.stderr}")
# Still try to parse stdout as the response might have been written before the error
self.logger.debug(f"Attempting to parse stdout despite error: {result.stdout[:500]}")
# Parse the response - look for the tool call response
response_data = self._parse_mcp_response(result.stdout, expected_id=2)
if not response_data:
return None, None
# Extract continuation_id if present
continuation_id = self._extract_continuation_id(response_data)
return response_data, continuation_id
except subprocess.TimeoutExpired:
self.logger.error(f"MCP tool call timed out after 1 hour: {tool_name}")
return None, None
except Exception as e:
self.logger.error(f"MCP tool call failed: {e}")
return None, None
def _parse_mcp_response(self, stdout: str, expected_id: int = 2) -> Optional[str]:
"""Parse MCP JSON-RPC response from stdout"""
try:
lines = stdout.strip().split("\n")
for line in lines:
if line.strip() and line.startswith("{"):
response = json.loads(line)
# Look for the tool call response with the expected ID
if response.get("id") == expected_id and "result" in response:
# Extract the actual content from the response
result = response["result"]
# Handle new response format with 'content' array
if isinstance(result, dict) and "content" in result:
content_array = result["content"]
if isinstance(content_array, list) and len(content_array) > 0:
return content_array[0].get("text", "")
# Handle legacy format
elif isinstance(result, list) and len(result) > 0:
return result[0].get("text", "")
elif response.get("id") == expected_id and "error" in response:
self.logger.error(f"MCP error: {response['error']}")
return None
# If we get here, log all responses for debugging
self.logger.warning(f"No valid tool call response found for ID {expected_id}")
self.logger.warning(f"Full stdout: {stdout}")
self.logger.warning(f"Total stdout lines: {len(lines)}")
for i, line in enumerate(lines[:10]): # Log first 10 lines
self.logger.warning(f"Line {i}: {line[:100]}...")
return None
except json.JSONDecodeError as e:
self.logger.error(f"Failed to parse MCP response: {e}")
self.logger.debug(f"Stdout that failed to parse: {stdout}")
return None
def _extract_continuation_id(self, response_text: str) -> Optional[str]:
"""Extract continuation_id from response metadata"""
try:
# Parse the response text as JSON to look for continuation metadata
response_data = json.loads(response_text)
# Look for continuation_id in various places
if isinstance(response_data, dict):
# Check metadata
metadata = response_data.get("metadata", {})
if "thread_id" in metadata:
return metadata["thread_id"]
# Check follow_up_request
follow_up = response_data.get("follow_up_request", {})
if follow_up and "continuation_id" in follow_up:
return follow_up["continuation_id"]
# Check continuation_offer
continuation_offer = response_data.get("continuation_offer", {})
if continuation_offer and "continuation_id" in continuation_offer:
return continuation_offer["continuation_id"]
self.logger.debug(f"No continuation_id found in response: {response_data}")
return None
except json.JSONDecodeError as e:
self.logger.debug(f"Failed to parse response for continuation_id: {e}")
return None
def run_command(self, cmd: list[str], check: bool = True, capture_output: bool = False, **kwargs):
"""Run a shell command with logging"""
if self.verbose:
self.logger.debug(f"Running: {' '.join(cmd)}")
return subprocess.run(cmd, check=check, capture_output=capture_output, **kwargs)
def create_additional_test_file(self, filename: str, content: str) -> str:
"""Create an additional test file for mixed scenario testing"""
if not hasattr(self, "test_dir") or not self.test_dir:
raise RuntimeError("Test directory not initialized. Call setup_test_files() first.")
file_path = os.path.join(self.test_dir, filename)
with open(file_path, "w") as f:
f.write(content)
# Return absolute path for MCP server compatibility
return os.path.abspath(file_path)
def cleanup_test_files(self):
"""Clean up test files"""
if hasattr(self, "test_dir") and self.test_dir and os.path.exists(self.test_dir):
import shutil
shutil.rmtree(self.test_dir)
self.logger.debug(f"Removed test files directory: {self.test_dir}")
def run_test(self) -> bool:
"""Run the test - to be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement run_test()")
@property
def test_name(self) -> str:
"""Get the test name - to be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement test_name property")
@property
def test_description(self) -> str:
"""Get the test description - to be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement test_description property")