diff --git a/config.py b/config.py
index da5ccc9..f51acc4 100644
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.5.6"
+__version__ = "5.5.7"
 # Last update date in ISO format
 __updated__ = "2025-06-22"
 # Primary maintainer
diff --git a/simulator_tests/test_consensus_conversation.py b/simulator_tests/test_consensus_conversation.py
index a078342..44eba24 100644
--- a/simulator_tests/test_consensus_conversation.py
+++ b/simulator_tests/test_consensus_conversation.py
@@ -78,7 +78,11 @@ class TestConsensusConversation(ConversationBaseTest):
             consensus_response, _ = self.call_mcp_tool(
                 "consensus",
                 {
-                    "prompt": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?",
+                    "step": "Based on our previous discussion about authentication, I need expert consensus: Should we implement OAuth2 or stick with simple session-based auth?",
+                    "step_number": 1,
+                    "total_steps": 2,
+                    "next_step_required": True,
+                    "findings": "Initial analysis needed on OAuth2 vs session-based authentication approaches for our web application",
                     "models": [
                         {
                             "model": "flash",
@@ -115,8 +119,10 @@ class TestConsensusConversation(ConversationBaseTest):
                 self.logger.error(f"Failed to parse consensus response as JSON. Full response: {consensus_response}")
                 return False
 
-            if consensus_data.get("status") != "consensus_success":
-                self.logger.error(f"Consensus failed with status: {consensus_data.get('status')}")
+            # Check for step 1 status (Claude analysis + first model consultation)
+            expected_status = "analysis_and_first_model_consulted"
+            if consensus_data.get("status") != expected_status:
+                self.logger.error(f"Consensus step 1 failed with status: {consensus_data.get('status')}, expected: {expected_status}")
                 if "error" in consensus_data:
                     self.logger.error(f"Error: {consensus_data['error']}")
                 return False
@@ -172,26 +178,29 @@ class TestConsensusConversation(ConversationBaseTest):
             # Phase 4: Verify response structure
             self.logger.info("Phase 4: Verifying consensus response structure")
 
-            # Check that consensus has proper models_used
-            models_used = consensus_data.get("models_used", [])
-            if not models_used:
-                self.logger.error("Consensus response missing models_used")
+            # Check that we have model response from step 1 
+            model_response = consensus_data.get("model_response")
+            if not model_response:
+                self.logger.error("Consensus step 1 response missing model_response")
                 return False
 
-            # Check that we have responses
-            responses = consensus_data.get("responses", [])
-            if not responses:
-                self.logger.error("Consensus response missing responses")
+            # Check that model response has expected structure
+            if not model_response.get("model") or not model_response.get("verdict"):
+                self.logger.error("Model response missing required fields (model or verdict)")
                 return False
 
-            # Verify at least one successful response
-            successful_responses = [r for r in responses if r.get("status") == "success"]
-            if not successful_responses:
-                self.logger.error("No successful responses in consensus")
+            # Check step information
+            if consensus_data.get("step_number") != 1:
+                self.logger.error(f"Expected step_number 1, got: {consensus_data.get('step_number')}")
                 return False
 
-            self.logger.info(f"Consensus used models: {models_used}")
-            self.logger.info(f"Consensus had {len(successful_responses)} successful responses")
+            if not consensus_data.get("next_step_required"):
+                self.logger.error("Expected next_step_required=True for step 1")
+                return False
+
+            self.logger.info(f"Consensus step 1 consulted model: {model_response.get('model')}")
+            self.logger.info(f"Model stance: {model_response.get('stance', 'neutral')}")
+            self.logger.info(f"Response status: {model_response.get('status', 'unknown')}")
 
             # Phase 5: Cross-tool continuation test
             self.logger.info("Phase 5: Testing cross-tool continuation from consensus")
diff --git a/simulator_tests/test_consensus_three_models.py b/simulator_tests/test_consensus_three_models.py
index 3cd4773..67b24ed 100644
--- a/simulator_tests/test_consensus_three_models.py
+++ b/simulator_tests/test_consensus_three_models.py
@@ -23,11 +23,15 @@ class TestConsensusThreeModels(BaseSimulatorTest):
         try:
             self.logger.info("Testing consensus tool with three models: flash:against, flash:for, local-llama:neutral")
 
-            # Send request with three ModelConfig objects
+            # Send request with three ModelConfig objects using new workflow parameters
             response, continuation_id = self.call_mcp_tool(
                 "consensus",
                 {
-                    "prompt": "Is a sync manager class a good idea for my CoolTodos app?",
+                    "step": "Is a sync manager class a good idea for my CoolTodos app?",
+                    "step_number": 1,
+                    "total_steps": 3,  # 3 models = 3 steps
+                    "next_step_required": True,
+                    "findings": "Initial analysis needed on sync manager class architecture decision for CoolTodos app",
                     "models": [
                         {
                             "model": "flash",
@@ -45,8 +49,7 @@ class TestConsensusThreeModels(BaseSimulatorTest):
                             "stance_prompt": "You are a pragmatic software engineer. Provide a balanced analysis considering both the benefits and drawbacks. Focus on the specific context of a CoolTodos app and what factors would determine if this is the right choice.",
                         },
                     ],
-                    "model": "flash",  # Default model for Claude's synthesis
-                    "focus_areas": ["architecture", "maintainability", "complexity", "scalability"],
+                    "model": "flash",  # Default model for Claude's execution
                 },
             )
 
@@ -69,8 +72,10 @@ class TestConsensusThreeModels(BaseSimulatorTest):
                 self.logger.error("Missing 'status' field in three-model consensus response")
                 return False
 
-            if consensus_data["status"] != "consensus_success":
-                self.logger.error(f"Three-model consensus failed with status: {consensus_data['status']}")
+            # Check for step 1 status (Claude analysis + first model consultation)
+            expected_status = "analysis_and_first_model_consulted"
+            if consensus_data["status"] != expected_status:
+                self.logger.error(f"Three-model consensus step 1 failed with status: {consensus_data['status']}, expected: {expected_status}")
 
                 # Log additional error details for debugging
                 if "error" in consensus_data:
@@ -84,67 +89,52 @@ class TestConsensusThreeModels(BaseSimulatorTest):
 
                 return False
 
-            # Check that models were used correctly
-            if "models_used" not in consensus_data:
-                self.logger.error("Missing 'models_used' field in three-model consensus response")
+            # Check that we have model response from step 1 
+            model_response = consensus_data.get("model_response")
+            if not model_response:
+                self.logger.error("Three-model consensus step 1 response missing model_response")
                 return False
 
-            models_used = consensus_data["models_used"]
-            self.logger.info(f"Models used in three-model test: {models_used}")
-
-            # Validate we got the expected models (allowing for some to fail)
-            expected_models = ["flash:against", "flash:for", "local-llama"]
-            successful_models = [m for m in expected_models if m in models_used]
-
-            if len(successful_models) == 0:
-                self.logger.error("No models succeeded in three-model consensus test")
+            # Check that model response has expected structure
+            if not model_response.get("model") or not model_response.get("verdict"):
+                self.logger.error("Model response missing required fields (model or verdict)")
                 return False
 
-            self.logger.info(f"Successful models in three-model test: {successful_models}")
-
-            # Validate responses structure
-            if "responses" not in consensus_data:
-                self.logger.error("Missing 'responses' field in three-model consensus response")
+            # Check step information
+            if consensus_data.get("step_number") != 1:
+                self.logger.error(f"Expected step_number 1, got: {consensus_data.get('step_number')}")
                 return False
 
-            responses = consensus_data["responses"]
-            if len(responses) == 0:
-                self.logger.error("No responses received in three-model consensus test")
+            if not consensus_data.get("next_step_required"):
+                self.logger.error("Expected next_step_required=True for step 1")
                 return False
 
-            self.logger.info(f"Received {len(responses)} responses in three-model test")
+            self.logger.info(f"Consensus step 1 consulted model: {model_response.get('model')}")
+            self.logger.info(f"Model stance: {model_response.get('stance', 'neutral')}")
+            self.logger.info(f"Response status: {model_response.get('status', 'unknown')}")
 
-            # Count successful responses by stance
-            stance_counts = {"for": 0, "against": 0, "neutral": 0}
-            for resp in responses:
-                if resp.get("status") == "success":
-                    stance = resp.get("stance", "neutral")
-                    stance_counts[stance] = stance_counts.get(stance, 0) + 1
-
-            self.logger.info(f"Stance distribution: {stance_counts}")
-
-            # Verify we have at least one successful response
-            total_successful = sum(stance_counts.values())
-            if total_successful == 0:
-                self.logger.error("No successful responses in three-model consensus test")
+            # Check metadata contains model name
+            metadata = consensus_data.get("metadata", {})
+            if not metadata.get("model_name"):
+                self.logger.error("Missing model_name in metadata")
                 return False
 
-            # Check for sequential processing indication (>2 models should use sequential)
-            if len(consensus_data["models_used"]) > 2:
-                self.logger.info("✓ Sequential processing was correctly used for >2 models")
-            else:
-                self.logger.info("✓ Concurrent processing was used (≤2 models)")
+            self.logger.info(f"Model name in metadata: {metadata.get('model_name')}")
 
-            # Verify synthesis guidance is present
-            if "next_steps" not in consensus_data:
-                self.logger.error("Missing 'next_steps' field in three-model consensus response")
+            # Verify we have analysis from Claude
+            claude_analysis = consensus_data.get("claude_analysis")
+            if not claude_analysis:
+                self.logger.error("Missing Claude's analysis in step 1")
                 return False
 
+            analysis_text = claude_analysis.get("initial_analysis", "")
+            self.logger.info(f"Claude analysis length: {len(analysis_text)} characters")
+
             self.logger.info("✓ Three-model consensus tool test completed successfully")
-            self.logger.info(f"✓ Total successful responses: {total_successful}")
-            self.logger.info(
-                f"✓ Stance diversity achieved: {len([s for s in stance_counts.values() if s > 0])} different stances"
-            )
+            self.logger.info(f"✓ Step 1 completed with model: {model_response.get('model')}")
+            self.logger.info(f"✓ Analysis provided: {len(analysis_text)} characters")
+            self.logger.info(f"✓ Model metadata properly included: {metadata.get('model_name')}")
+            self.logger.info("✓ Ready for step 2 continuation")
 
             return True
 
diff --git a/test_simulation_files/config.json b/test_simulation_files/config.json
deleted file mode 100644
index c066b27..0000000
--- a/test_simulation_files/config.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "database": {
-    "host": "localhost",
-    "port": 5432,
-    "name": "testdb",
-    "ssl": true
-  },
-  "cache": {
-    "redis_url": "redis://localhost:6379",
-    "ttl": 3600
-  },
-  "logging": {
-    "level": "INFO",
-    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-  }
-}
\ No newline at end of file
diff --git a/test_simulation_files/test_module.py b/test_simulation_files/test_module.py
deleted file mode 100644
index 5defb99..0000000
--- a/test_simulation_files/test_module.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Sample Python module for testing MCP conversation continuity
-"""
-
-def fibonacci(n):
-    """Calculate fibonacci number recursively"""
-    if n <= 1:
-        return n
-    return fibonacci(n-1) + fibonacci(n-2)
-
-def factorial(n):
-    """Calculate factorial iteratively"""
-    result = 1
-    for i in range(1, n + 1):
-        result *= i
-    return result
-
-class Calculator:
-    """Simple calculator class"""
-
-    def __init__(self):
-        self.history = []
-
-    def add(self, a, b):
-        result = a + b
-        self.history.append(f"{a} + {b} = {result}")
-        return result
-
-    def multiply(self, a, b):
-        result = a * b
-        self.history.append(f"{a} * {b} = {result}")
-        return result
diff --git a/tools/consensus.py b/tools/consensus.py
index 2d9146e..874c300 100644
--- a/tools/consensus.py
+++ b/tools/consensus.py
@@ -502,6 +502,16 @@ of the evidence, even when it strongly points in one direction.""",
                 # Add accumulated responses for tracking
                 response_data["accumulated_responses"] = self.accumulated_responses
 
+                # Add metadata (since we're bypassing the base class metadata addition)
+                model_name = self.get_request_model_name(request)
+                provider = self.get_model_provider(model_name)
+                response_data["metadata"] = {
+                    "tool_name": self.get_name(),
+                    "model_name": model_name,
+                    "model_used": model_name,
+                    "provider_used": provider.get_provider_type().value,
+                }
+
                 return [TextContent(type="text", text=json.dumps(response_data, indent=2))]
 
         # Otherwise, use standard workflow execution