Improvements to consensus
This commit is contained in:
@@ -14,7 +14,7 @@ import os
|
|||||||
# These values are used in server responses and for tracking releases
|
# These values are used in server responses and for tracking releases
|
||||||
# IMPORTANT: This is the single source of truth for version and author info
|
# IMPORTANT: This is the single source of truth for version and author info
|
||||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||||
__version__ = "5.8.4"
|
__version__ = "5.8.5"
|
||||||
# Last update date in ISO format
|
# Last update date in ISO format
|
||||||
__updated__ = "2025-08-08"
|
__updated__ = "2025-08-08"
|
||||||
# Primary maintainer
|
# Primary maintainer
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ class TestAutoModelPlannerFix:
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
response_data = json.loads(result[0].text)
|
response_data = json.loads(result[0].text)
|
||||||
assert response_data["status"] == "planner_complete"
|
assert response_data["status"] == "planning_complete"
|
||||||
assert response_data["step_number"] == 1
|
assert response_data["step_number"] == 1
|
||||||
|
|
||||||
@patch("config.DEFAULT_MODEL", "auto")
|
@patch("config.DEFAULT_MODEL", "auto")
|
||||||
@@ -172,7 +172,7 @@ class TestAutoModelPlannerFix:
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
response1 = json.loads(result1[0].text)
|
response1 = json.loads(result1[0].text)
|
||||||
assert response1["status"] == "pause_for_planner"
|
assert response1["status"] == "pause_for_planning"
|
||||||
assert response1["next_step_required"] is True
|
assert response1["next_step_required"] is True
|
||||||
assert "continuation_id" in response1
|
assert "continuation_id" in response1
|
||||||
|
|
||||||
@@ -190,7 +190,7 @@ class TestAutoModelPlannerFix:
|
|||||||
assert len(result2) > 0
|
assert len(result2) > 0
|
||||||
|
|
||||||
response2 = json.loads(result2[0].text)
|
response2 = json.loads(result2[0].text)
|
||||||
assert response2["status"] == "pause_for_planner"
|
assert response2["status"] == "pause_for_planning"
|
||||||
assert response2["step_number"] == 2
|
assert response2["step_number"] == 2
|
||||||
|
|
||||||
def test_other_tools_still_require_models(self):
|
def test_other_tools_still_require_models(self):
|
||||||
|
|||||||
@@ -226,7 +226,7 @@ class TestPlannerTool:
|
|||||||
parsed_response = json.loads(response_text)
|
parsed_response = json.loads(response_text)
|
||||||
|
|
||||||
# Check final step structure
|
# Check final step structure
|
||||||
assert parsed_response["status"] == "planner_complete"
|
assert parsed_response["status"] == "planning_complete"
|
||||||
assert parsed_response["step_number"] == 10
|
assert parsed_response["step_number"] == 10
|
||||||
assert parsed_response["planning_complete"] is True
|
assert parsed_response["planning_complete"] is True
|
||||||
assert "plan_summary" in parsed_response
|
assert "plan_summary" in parsed_response
|
||||||
@@ -329,7 +329,7 @@ class TestPlannerTool:
|
|||||||
# Total steps should be adjusted to match current step
|
# Total steps should be adjusted to match current step
|
||||||
assert parsed_response["total_steps"] == 8
|
assert parsed_response["total_steps"] == 8
|
||||||
assert parsed_response["step_number"] == 8
|
assert parsed_response["step_number"] == 8
|
||||||
assert parsed_response["status"] == "pause_for_planner"
|
assert parsed_response["status"] == "pause_for_planning"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_execute_error_handling(self):
|
async def test_execute_error_handling(self):
|
||||||
@@ -457,6 +457,6 @@ class TestPlannerToolIntegration:
|
|||||||
assert parsed_response["total_steps"] == 3
|
assert parsed_response["total_steps"] == 3
|
||||||
assert parsed_response["continuation_id"] == "test-simple-uuid"
|
assert parsed_response["continuation_id"] == "test-simple-uuid"
|
||||||
# For simple plans (< 5 steps), expect normal flow without deep thinking pause
|
# For simple plans (< 5 steps), expect normal flow without deep thinking pause
|
||||||
assert parsed_response["status"] == "pause_for_planner"
|
assert parsed_response["status"] == "pause_for_planning"
|
||||||
assert "thinking_required" not in parsed_response
|
assert "thinking_required" not in parsed_response
|
||||||
assert "Continue with step 2" in parsed_response["next_steps"]
|
assert "Continue with step 2" in parsed_response["next_steps"]
|
||||||
|
|||||||
@@ -37,11 +37,12 @@ logger = logging.getLogger(__name__)
|
|||||||
# Tool-specific field descriptions for consensus workflow
|
# Tool-specific field descriptions for consensus workflow
|
||||||
CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = {
|
CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = {
|
||||||
"step": (
|
"step": (
|
||||||
"Describe your current consensus analysis step. In step 1, provide your own neutral, balanced analysis "
|
"In step 1: Provide the EXACT question or proposal that ALL models will evaluate. This should be phrased as a clear "
|
||||||
"of the proposal/idea/plan after thinking carefully about all aspects. Consider technical feasibility, "
|
"question or problem statement, NOT as 'I will analyze...' or 'Let me examine...'. For example: 'Should we build a "
|
||||||
"user value, implementation complexity, and alternatives. In subsequent steps (2+), you will receive "
|
"search component in SwiftUI for use in an AppKit app?' or 'Evaluate the proposal to migrate our database from MySQL "
|
||||||
"individual model responses to synthesize. CRITICAL: Be thorough and balanced in your initial assessment, "
|
"to PostgreSQL'. This exact text will be sent to all models for their independent evaluation. "
|
||||||
"considering both benefits and risks, opportunities and challenges."
|
"In subsequent steps (2+): This field is for internal tracking only - you can provide notes about the model response "
|
||||||
|
"you just received. This will NOT be sent to other models (they all receive the original proposal from step 1)."
|
||||||
),
|
),
|
||||||
"step_number": (
|
"step_number": (
|
||||||
"The index of the current step in the consensus workflow, beginning at 1. Step 1 is your analysis, "
|
"The index of the current step in the consensus workflow, beginning at 1. Step 1 is your analysis, "
|
||||||
@@ -54,8 +55,11 @@ CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = {
|
|||||||
),
|
),
|
||||||
"next_step_required": ("Set to true if more models need to be consulted. False when ready for final synthesis."),
|
"next_step_required": ("Set to true if more models need to be consulted. False when ready for final synthesis."),
|
||||||
"findings": (
|
"findings": (
|
||||||
"In step 1, provide your comprehensive analysis of the proposal. In steps 2+, summarize the key points "
|
"In step 1: Provide YOUR OWN comprehensive analysis of the proposal/question. This is where you share your "
|
||||||
"from the model response received, noting agreements and disagreements with previous analyses."
|
"independent evaluation, considering technical feasibility, risks, benefits, and alternatives. This analysis "
|
||||||
|
"is NOT sent to other models - it's recorded for the final synthesis. "
|
||||||
|
"In steps 2+: Summarize the key points from the model response received, noting agreements and disagreements "
|
||||||
|
"with previous analyses."
|
||||||
),
|
),
|
||||||
"relevant_files": (
|
"relevant_files": (
|
||||||
"Files that are relevant to the consensus analysis. Include files that help understand the proposal, "
|
"Files that are relevant to the consensus analysis. Include files that help understand the proposal, "
|
||||||
@@ -161,6 +165,7 @@ class ConsensusTool(WorkflowTool):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.initial_prompt: str | None = None
|
self.initial_prompt: str | None = None
|
||||||
|
self.original_proposal: str | None = None # Store the original proposal separately
|
||||||
self.models_to_consult: list[dict] = []
|
self.models_to_consult: list[dict] = []
|
||||||
self.accumulated_responses: list[dict] = []
|
self.accumulated_responses: list[dict] = []
|
||||||
self._current_arguments: dict[str, Any] = {}
|
self._current_arguments: dict[str, Any] = {}
|
||||||
@@ -394,7 +399,7 @@ of the evidence, even when it strongly points in one direction.""",
|
|||||||
|
|
||||||
# Prepare final synthesis data
|
# Prepare final synthesis data
|
||||||
response_data["complete_consensus"] = {
|
response_data["complete_consensus"] = {
|
||||||
"initial_prompt": self.initial_prompt,
|
"initial_prompt": self.original_proposal if self.original_proposal else self.initial_prompt,
|
||||||
"models_consulted": [m["model"] + ":" + m.get("stance", "neutral") for m in self.accumulated_responses],
|
"models_consulted": [m["model"] + ":" + m.get("stance", "neutral") for m in self.accumulated_responses],
|
||||||
"total_responses": len(self.accumulated_responses),
|
"total_responses": len(self.accumulated_responses),
|
||||||
"consensus_confidence": "high", # Consensus complete
|
"consensus_confidence": "high", # Consensus complete
|
||||||
@@ -445,7 +450,9 @@ of the evidence, even when it strongly points in one direction.""",
|
|||||||
|
|
||||||
# On first step, store the models to consult
|
# On first step, store the models to consult
|
||||||
if request.step_number == 1:
|
if request.step_number == 1:
|
||||||
self.initial_prompt = request.step
|
# Store the original proposal from step 1 - this is what all models should see
|
||||||
|
self.original_proposal = request.step
|
||||||
|
self.initial_prompt = request.step # Keep for backward compatibility
|
||||||
self.models_to_consult = request.models or []
|
self.models_to_consult = request.models or []
|
||||||
self.accumulated_responses = []
|
self.accumulated_responses = []
|
||||||
# Set total steps: len(models) (each step includes consultation + response)
|
# Set total steps: len(models) (each step includes consultation + response)
|
||||||
@@ -488,7 +495,7 @@ of the evidence, even when it strongly points in one direction.""",
|
|||||||
response_data["status"] = "consensus_workflow_complete"
|
response_data["status"] = "consensus_workflow_complete"
|
||||||
response_data["consensus_complete"] = True
|
response_data["consensus_complete"] = True
|
||||||
response_data["complete_consensus"] = {
|
response_data["complete_consensus"] = {
|
||||||
"initial_prompt": self.initial_prompt,
|
"initial_prompt": self.original_proposal if self.original_proposal else self.initial_prompt,
|
||||||
"models_consulted": [
|
"models_consulted": [
|
||||||
f"{m['model']}:{m.get('stance', 'neutral')}" for m in self.accumulated_responses
|
f"{m['model']}:{m.get('stance', 'neutral')}" for m in self.accumulated_responses
|
||||||
],
|
],
|
||||||
@@ -539,7 +546,9 @@ of the evidence, even when it strongly points in one direction.""",
|
|||||||
# Prepare the prompt with any relevant files
|
# Prepare the prompt with any relevant files
|
||||||
# Use continuation_id=None for blinded consensus - each model should only see
|
# Use continuation_id=None for blinded consensus - each model should only see
|
||||||
# original prompt + files, not conversation history or other model responses
|
# original prompt + files, not conversation history or other model responses
|
||||||
prompt = self.initial_prompt
|
# CRITICAL: Use the original proposal from step 1, NOT what's in request.step for steps 2+!
|
||||||
|
# Steps 2+ contain summaries/notes that must NEVER be sent to other models
|
||||||
|
prompt = self.original_proposal if self.original_proposal else self.initial_prompt
|
||||||
if request.relevant_files:
|
if request.relevant_files:
|
||||||
file_content, _ = self._prepare_file_content_for_prompt(
|
file_content, _ = self._prepare_file_content_for_prompt(
|
||||||
request.relevant_files,
|
request.relevant_files,
|
||||||
@@ -761,7 +770,8 @@ of the evidence, even when it strongly points in one direction.""",
|
|||||||
|
|
||||||
def store_initial_issue(self, step_description: str):
|
def store_initial_issue(self, step_description: str):
|
||||||
"""Store initial prompt for model consultations."""
|
"""Store initial prompt for model consultations."""
|
||||||
self.initial_prompt = step_description
|
self.original_proposal = step_description
|
||||||
|
self.initial_prompt = step_description # Keep for backward compatibility
|
||||||
|
|
||||||
# Required abstract methods from BaseTool
|
# Required abstract methods from BaseTool
|
||||||
def get_request_model(self):
|
def get_request_model(self):
|
||||||
|
|||||||
@@ -535,9 +535,9 @@ class TracerTool(WorkflowTool):
|
|||||||
tool_name = self.get_name()
|
tool_name = self.get_name()
|
||||||
status_mapping = {
|
status_mapping = {
|
||||||
f"{tool_name}_in_progress": "tracing_in_progress",
|
f"{tool_name}_in_progress": "tracing_in_progress",
|
||||||
f"pause_for_{tool_name}": f"pause_for_tracing",
|
f"pause_for_{tool_name}": "pause_for_tracing",
|
||||||
f"{tool_name}_required": f"tracing_required",
|
f"{tool_name}_required": "tracing_required",
|
||||||
f"{tool_name}_complete": f"tracing_complete",
|
f"{tool_name}_complete": "tracing_complete",
|
||||||
}
|
}
|
||||||
|
|
||||||
if response_data["status"] in status_mapping:
|
if response_data["status"] in status_mapping:
|
||||||
|
|||||||
Reference in New Issue
Block a user