Fixed contamination in consensus https://github.com/BeehiveInnovations/zen-mcp-server/issues/162
Fixed broken test
This commit is contained in:
@@ -154,7 +154,10 @@ def multiply(x, y):
|
|||||||
openai_model_usage = len(openai_model_logs) >= 1 # Should see at least 1 model usage log
|
openai_model_usage = len(openai_model_logs) >= 1 # Should see at least 1 model usage log
|
||||||
openai_responses_received = len(openai_response_logs) >= 1 # Should see at least 1 response
|
openai_responses_received = len(openai_response_logs) >= 1 # Should see at least 1 response
|
||||||
some_chat_calls_to_openai = len(chat_openai_logs) >= 1 # Should see at least 1 chat call
|
some_chat_calls_to_openai = len(chat_openai_logs) >= 1 # Should see at least 1 chat call
|
||||||
some_workflow_calls_to_openai = len(codereview_openai_logs) >= 1 or len([line for line in logs.split("\n") if "openai" in line and "codereview" in line]) > 0 # Should see evidence of workflow tool usage
|
some_workflow_calls_to_openai = (
|
||||||
|
len(codereview_openai_logs) >= 1
|
||||||
|
or len([line for line in logs.split("\n") if "openai" in line and "codereview" in line]) > 0
|
||||||
|
) # Should see evidence of workflow tool usage
|
||||||
|
|
||||||
self.logger.info(f" OpenAI API call logs: {len(openai_api_logs)}")
|
self.logger.info(f" OpenAI API call logs: {len(openai_api_logs)}")
|
||||||
self.logger.info(f" OpenAI model usage logs: {len(openai_model_logs)}")
|
self.logger.info(f" OpenAI model usage logs: {len(openai_model_logs)}")
|
||||||
@@ -179,7 +182,10 @@ def multiply(x, y):
|
|||||||
("OpenAI model usage logged", openai_model_usage),
|
("OpenAI model usage logged", openai_model_usage),
|
||||||
("OpenAI responses received", openai_responses_received),
|
("OpenAI responses received", openai_responses_received),
|
||||||
("Chat tool used OpenAI", some_chat_calls_to_openai),
|
("Chat tool used OpenAI", some_chat_calls_to_openai),
|
||||||
("Workflow tool attempted", some_workflow_calls_to_openai or response3 is not None), # More flexible check
|
(
|
||||||
|
"Workflow tool attempted",
|
||||||
|
some_workflow_calls_to_openai or response3 is not None,
|
||||||
|
), # More flexible check
|
||||||
]
|
]
|
||||||
|
|
||||||
passed_criteria = sum(1 for _, passed in success_criteria if passed)
|
passed_criteria = sum(1 for _, passed in success_criteria if passed)
|
||||||
|
|||||||
@@ -537,11 +537,13 @@ of the evidence, even when it strongly points in one direction.""",
|
|||||||
provider = self.get_model_provider(model_name)
|
provider = self.get_model_provider(model_name)
|
||||||
|
|
||||||
# Prepare the prompt with any relevant files
|
# Prepare the prompt with any relevant files
|
||||||
|
# Use continuation_id=None for blinded consensus - each model should only see
|
||||||
|
# original prompt + files, not conversation history or other model responses
|
||||||
prompt = self.initial_prompt
|
prompt = self.initial_prompt
|
||||||
if request.relevant_files:
|
if request.relevant_files:
|
||||||
file_content, _ = self._prepare_file_content_for_prompt(
|
file_content, _ = self._prepare_file_content_for_prompt(
|
||||||
request.relevant_files,
|
request.relevant_files,
|
||||||
request.continuation_id,
|
None, # Use None instead of request.continuation_id for blinded consensus
|
||||||
"Context files",
|
"Context files",
|
||||||
)
|
)
|
||||||
if file_content:
|
if file_content:
|
||||||
|
|||||||
Reference in New Issue
Block a user