#!/usr/bin/env python3 """ Consensus Conversation Continuation Test Tests that the consensus tool properly handles conversation continuation and builds conversation context correctly when using continuation_id. """ import json import subprocess from .base_test import BaseSimulatorTest class TestConsensusConversation(BaseSimulatorTest): """Test consensus tool conversation continuation functionality""" @property def test_name(self) -> str: return "consensus_conversation" @property def test_description(self) -> str: return "Test consensus tool conversation building and continuation" def get_docker_logs(self): """Get Docker container logs""" try: result = subprocess.run( ["docker", "logs", "--tail", "100", self.container_name], capture_output=True, text=True, timeout=30 ) if result.returncode == 0: return result.stdout.split("\n") else: self.logger.warning(f"Failed to get Docker logs: {result.stderr}") return [] except Exception as e: self.logger.warning(f"Exception getting Docker logs: {e}") return [] def run_test(self) -> bool: """Test consensus conversation continuation""" try: self.logger.info("Testing consensus tool conversation continuation") # Setup test files for context self.setup_test_files() # Phase 1: Start conversation with chat tool (which properly creates continuation_id) self.logger.info("Phase 1: Starting conversation with chat tool") initial_response, continuation_id = self.call_mcp_tool( "chat", { "prompt": "Please use low thinking mode. I'm working on a web application and need advice on authentication. Can you look at this code?", "files": [self.test_files["python"]], "model": "local-llama", }, ) # Validate initial response if not initial_response: self.logger.error("Failed to get initial chat response") return False if not continuation_id: self.logger.error("Failed to get continuation_id from initial chat") return False self.logger.info(f"Initial chat response preview: {initial_response[:200]}...") self.logger.info(f"Got continuation_id: {continuation_id}") # Phase 2: Use consensus with continuation_id to test conversation building self.logger.info("Phase 2: Using consensus with continuation_id to test conversation building") consensus_response, _ = self.call_mcp_tool( "consensus", { "prompt": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?", "models": [ { "model": "local-llama", "stance": "for", "stance_prompt": "Focus on OAuth2 benefits: security, scalability, and industry standards.", }, { "model": "local-llama", "stance": "against", "stance_prompt": "Focus on OAuth2 complexity: implementation challenges and simpler alternatives.", }, ], "continuation_id": continuation_id, "model": "local-llama", }, ) # Validate consensus response if not consensus_response: self.logger.error("Failed to get consensus response with continuation_id") return False self.logger.info(f"Consensus response preview: {consensus_response[:300]}...") # Log the full response for debugging if it's not JSON if not consensus_response.startswith("{"): self.logger.error(f"Consensus response is not JSON. Full response: {consensus_response}") return False # Parse consensus response try: consensus_data = json.loads(consensus_response) except json.JSONDecodeError: self.logger.error(f"Failed to parse consensus response as JSON. Full response: {consensus_response}") return False if consensus_data.get("status") != "consensus_success": self.logger.error(f"Consensus failed with status: {consensus_data.get('status')}") if "error" in consensus_data: self.logger.error(f"Error: {consensus_data['error']}") return False # Phase 3: Check server logs for conversation building self.logger.info("Phase 3: Checking server logs for conversation building") # Check for conversation-related log entries logs = self.get_docker_logs() if not logs: self.logger.warning("Could not retrieve Docker logs for verification") else: # Look for conversation building indicators conversation_logs = [ line for line in logs if any( keyword in line for keyword in [ "CONVERSATION HISTORY", "continuation_id", "build_conversation_history", "ThreadContext", f"thread:{continuation_id}", ] ) ] if conversation_logs: self.logger.info(f"Found {len(conversation_logs)} conversation-related log entries") # Show a few examples (truncated) for i, log in enumerate(conversation_logs[:3]): self.logger.info(f" Conversation log {i+1}: {log[:100]}...") else: self.logger.warning( "No conversation-related logs found (may indicate conversation not properly built)" ) # Check for any ERROR entries related to consensus error_logs = [ line for line in logs if "ERROR" in line and any(keyword in line for keyword in ["consensus", "conversation", continuation_id]) ] if error_logs: self.logger.error(f"Found {len(error_logs)} error logs related to consensus conversation:") for error in error_logs: self.logger.error(f" ERROR: {error}") return False # Phase 4: Verify response structure self.logger.info("Phase 4: Verifying consensus response structure") # Check that consensus has proper models_used models_used = consensus_data.get("models_used", []) if not models_used: self.logger.error("Consensus response missing models_used") return False # Check that we have responses responses = consensus_data.get("responses", []) if not responses: self.logger.error("Consensus response missing responses") return False # Verify at least one successful response successful_responses = [r for r in responses if r.get("status") == "success"] if not successful_responses: self.logger.error("No successful responses in consensus") return False self.logger.info(f"Consensus used models: {models_used}") self.logger.info(f"Consensus had {len(successful_responses)} successful responses") # Phase 5: Cross-tool continuation test self.logger.info("Phase 5: Testing cross-tool continuation from consensus") # Try to continue the conversation with a different tool chat_response, _ = self.call_mcp_tool( "chat", { "prompt": "Based on our consensus discussion about authentication, can you summarize the key points?", "continuation_id": continuation_id, "model": "local-llama", }, ) if not chat_response: self.logger.warning("Cross-tool continuation from consensus failed") # Don't fail the test for this - it's a bonus check else: self.logger.info("✓ Cross-tool continuation from consensus working") self.logger.info(f"Chat continuation preview: {chat_response[:200]}...") self.logger.info("✓ Consensus conversation continuation test completed successfully") return True except Exception as e: self.logger.error(f"Consensus conversation test failed with exception: {str(e)}") import traceback self.logger.error(f"Traceback: {traceback.format_exc()}") return False finally: self.cleanup_test_files()