Improved consensus to treat a step properly as both a request + response, and initial step includes Claude's assessment.
Improved prompt to not request for code when it's a general business decision
This commit is contained in:
@@ -78,7 +78,11 @@ class TestConsensusConversation(ConversationBaseTest):
|
||||
consensus_response, _ = self.call_mcp_tool(
|
||||
"consensus",
|
||||
{
|
||||
"prompt": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?",
|
||||
"step": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?",
|
||||
"step_number": 1,
|
||||
"total_steps": 2,
|
||||
"next_step_required": True,
|
||||
"findings": "Initial analysis needed on OAuth2 vs session-based authentication approaches for our web application",
|
||||
"models": [
|
||||
{
|
||||
"model": "flash",
|
||||
@@ -115,8 +119,10 @@ class TestConsensusConversation(ConversationBaseTest):
|
||||
self.logger.error(f"Failed to parse consensus response as JSON. Full response: {consensus_response}")
|
||||
return False
|
||||
|
||||
if consensus_data.get("status") != "consensus_success":
|
||||
self.logger.error(f"Consensus failed with status: {consensus_data.get('status')}")
|
||||
# Check for step 1 status (Claude analysis + first model consultation)
|
||||
expected_status = "analysis_and_first_model_consulted"
|
||||
if consensus_data.get("status") != expected_status:
|
||||
self.logger.error(f"Consensus step 1 failed with status: {consensus_data.get('status')}, expected: {expected_status}")
|
||||
if "error" in consensus_data:
|
||||
self.logger.error(f"Error: {consensus_data['error']}")
|
||||
return False
|
||||
@@ -172,26 +178,29 @@ class TestConsensusConversation(ConversationBaseTest):
|
||||
# Phase 4: Verify response structure
|
||||
self.logger.info("Phase 4: Verifying consensus response structure")
|
||||
|
||||
# Check that consensus has proper models_used
|
||||
models_used = consensus_data.get("models_used", [])
|
||||
if not models_used:
|
||||
self.logger.error("Consensus response missing models_used")
|
||||
# Check that we have model response from step 1
|
||||
model_response = consensus_data.get("model_response")
|
||||
if not model_response:
|
||||
self.logger.error("Consensus step 1 response missing model_response")
|
||||
return False
|
||||
|
||||
# Check that we have responses
|
||||
responses = consensus_data.get("responses", [])
|
||||
if not responses:
|
||||
self.logger.error("Consensus response missing responses")
|
||||
# Check that model response has expected structure
|
||||
if not model_response.get("model") or not model_response.get("verdict"):
|
||||
self.logger.error("Model response missing required fields (model or verdict)")
|
||||
return False
|
||||
|
||||
# Verify at least one successful response
|
||||
successful_responses = [r for r in responses if r.get("status") == "success"]
|
||||
if not successful_responses:
|
||||
self.logger.error("No successful responses in consensus")
|
||||
# Check step information
|
||||
if consensus_data.get("step_number") != 1:
|
||||
self.logger.error(f"Expected step_number 1, got: {consensus_data.get('step_number')}")
|
||||
return False
|
||||
|
||||
self.logger.info(f"Consensus used models: {models_used}")
|
||||
self.logger.info(f"Consensus had {len(successful_responses)} successful responses")
|
||||
if not consensus_data.get("next_step_required"):
|
||||
self.logger.error("Expected next_step_required=True for step 1")
|
||||
return False
|
||||
|
||||
self.logger.info(f"Consensus step 1 consulted model: {model_response.get('model')}")
|
||||
self.logger.info(f"Model stance: {model_response.get('stance', 'neutral')}")
|
||||
self.logger.info(f"Response status: {model_response.get('status', 'unknown')}")
|
||||
|
||||
# Phase 5: Cross-tool continuation test
|
||||
self.logger.info("Phase 5: Testing cross-tool continuation from consensus")
|
||||
|
||||
Reference in New Issue
Block a user