Improved consensus to treat a step properly as both a request + response, and initial step includes Claude's assessment.

Improved prompt to not request for code when it's a general business decision
2025-06-22 13:37:32 +04:00
parent 18f6f16ac6
commit 521c6c0e61
6 changed files with 79 additions and 118 deletions
--- a/simulator_tests/test_consensus_conversation.py
+++ b/simulator_tests/test_consensus_conversation.py
@@ -78,7 +78,11 @@ class TestConsensusConversation(ConversationBaseTest):
            consensus_response, _ = self.call_mcp_tool(
                "consensus",
                {
-                    "prompt": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?",
+                    "step": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?",
+                    "step_number": 1,
+                    "total_steps": 2,
+                    "next_step_required": True,
+                    "findings": "Initial analysis needed on OAuth2 vs session-based authentication approaches for our web application",
                    "models": [
                        {
                            "model": "flash",
@@ -115,8 +119,10 @@ class TestConsensusConversation(ConversationBaseTest):
                self.logger.error(f"Failed to parse consensus response as JSON. Full response: {consensus_response}")
                return False

-            if consensus_data.get("status") != "consensus_success":
-                self.logger.error(f"Consensus failed with status: {consensus_data.get('status')}")
+            # Check for step 1 status (Claude analysis + first model consultation)
+            expected_status = "analysis_and_first_model_consulted"
+            if consensus_data.get("status") != expected_status:
+                self.logger.error(f"Consensus step 1 failed with status: {consensus_data.get('status')}, expected: {expected_status}")
                if "error" in consensus_data:
                    self.logger.error(f"Error: {consensus_data['error']}")
                return False
@@ -172,26 +178,29 @@ class TestConsensusConversation(ConversationBaseTest):
            # Phase 4: Verify response structure
            self.logger.info("Phase 4: Verifying consensus response structure")

-            # Check that consensus has proper models_used
-            models_used = consensus_data.get("models_used", [])
-            if not models_used:
-                self.logger.error("Consensus response missing models_used")
+            # Check that we have model response from step 1 
+            model_response = consensus_data.get("model_response")
+            if not model_response:
+                self.logger.error("Consensus step 1 response missing model_response")
                return False

-            # Check that we have responses
-            responses = consensus_data.get("responses", [])
-            if not responses:
-                self.logger.error("Consensus response missing responses")
+            # Check that model response has expected structure
+            if not model_response.get("model") or not model_response.get("verdict"):
+                self.logger.error("Model response missing required fields (model or verdict)")
                return False

-            # Verify at least one successful response
-            successful_responses = [r for r in responses if r.get("status") == "success"]
-            if not successful_responses:
-                self.logger.error("No successful responses in consensus")
+            # Check step information
+            if consensus_data.get("step_number") != 1:
+                self.logger.error(f"Expected step_number 1, got: {consensus_data.get('step_number')}")
                return False

-            self.logger.info(f"Consensus used models: {models_used}")
-            self.logger.info(f"Consensus had {len(successful_responses)} successful responses")
+            if not consensus_data.get("next_step_required"):
+                self.logger.error("Expected next_step_required=True for step 1")
+                return False
+
+            self.logger.info(f"Consensus step 1 consulted model: {model_response.get('model')}")
+            self.logger.info(f"Model stance: {model_response.get('stance', 'neutral')}")
+            self.logger.info(f"Response status: {model_response.get('status', 'unknown')}")

            # Phase 5: Cross-tool continuation test
            self.logger.info("Phase 5: Testing cross-tool continuation from consensus")