Rebranding, refactoring, renaming, cleanup, updated docs
This commit is contained in:
@@ -4,14 +4,14 @@ Conversation Chain and Threading Validation Test
|
||||
|
||||
This test validates that:
|
||||
1. Multiple tool invocations create proper parent->parent->parent chains
|
||||
2. New conversations can be started independently
|
||||
2. New conversations can be started independently
|
||||
3. Original conversation chains can be resumed from any point
|
||||
4. History traversal works correctly for all scenarios
|
||||
5. Thread relationships are properly maintained in Redis
|
||||
|
||||
Test Flow:
|
||||
Chain A: chat -> analyze -> debug (3 linked threads)
|
||||
Chain B: chat -> analyze (2 linked threads, independent)
|
||||
Chain B: chat -> analyze (2 linked threads, independent)
|
||||
Chain A Branch: debug (continue from original chat, creating branch)
|
||||
|
||||
This validates the conversation threading system's ability to:
|
||||
@@ -21,10 +21,8 @@ This validates the conversation threading system's ability to:
|
||||
- Properly traverse parent relationships for history reconstruction
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import subprocess
|
||||
import re
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
import subprocess
|
||||
|
||||
from .base_test import BaseSimulatorTest
|
||||
|
||||
@@ -45,7 +43,7 @@ class ConversationChainValidationTest(BaseSimulatorTest):
|
||||
try:
|
||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
else:
|
||||
@@ -55,44 +53,36 @@ class ConversationChainValidationTest(BaseSimulatorTest):
|
||||
self.logger.error(f"Failed to get server logs: {e}")
|
||||
return ""
|
||||
|
||||
def extract_thread_creation_logs(self, logs: str) -> List[Dict[str, str]]:
|
||||
def extract_thread_creation_logs(self, logs: str) -> list[dict[str, str]]:
|
||||
"""Extract thread creation logs with parent relationships"""
|
||||
thread_logs = []
|
||||
|
||||
lines = logs.split('\n')
|
||||
|
||||
lines = logs.split("\n")
|
||||
for line in lines:
|
||||
if "[THREAD] Created new thread" in line:
|
||||
# Parse: [THREAD] Created new thread 9dc779eb-645f-4850-9659-34c0e6978d73 with parent a0ce754d-c995-4b3e-9103-88af429455aa
|
||||
match = re.search(r'\[THREAD\] Created new thread ([a-f0-9-]+) with parent ([a-f0-9-]+|None)', line)
|
||||
match = re.search(r"\[THREAD\] Created new thread ([a-f0-9-]+) with parent ([a-f0-9-]+|None)", line)
|
||||
if match:
|
||||
thread_id = match.group(1)
|
||||
parent_id = match.group(2) if match.group(2) != "None" else None
|
||||
thread_logs.append({
|
||||
"thread_id": thread_id,
|
||||
"parent_id": parent_id,
|
||||
"log_line": line
|
||||
})
|
||||
|
||||
thread_logs.append({"thread_id": thread_id, "parent_id": parent_id, "log_line": line})
|
||||
|
||||
return thread_logs
|
||||
|
||||
def extract_history_traversal_logs(self, logs: str) -> List[Dict[str, str]]:
|
||||
def extract_history_traversal_logs(self, logs: str) -> list[dict[str, str]]:
|
||||
"""Extract conversation history traversal logs"""
|
||||
traversal_logs = []
|
||||
|
||||
lines = logs.split('\n')
|
||||
|
||||
lines = logs.split("\n")
|
||||
for line in lines:
|
||||
if "[THREAD] Retrieved chain of" in line:
|
||||
# Parse: [THREAD] Retrieved chain of 3 threads for 9dc779eb-645f-4850-9659-34c0e6978d73
|
||||
match = re.search(r'\[THREAD\] Retrieved chain of (\d+) threads for ([a-f0-9-]+)', line)
|
||||
match = re.search(r"\[THREAD\] Retrieved chain of (\d+) threads for ([a-f0-9-]+)", line)
|
||||
if match:
|
||||
chain_length = int(match.group(1))
|
||||
thread_id = match.group(2)
|
||||
traversal_logs.append({
|
||||
"thread_id": thread_id,
|
||||
"chain_length": chain_length,
|
||||
"log_line": line
|
||||
})
|
||||
|
||||
traversal_logs.append({"thread_id": thread_id, "chain_length": chain_length, "log_line": line})
|
||||
|
||||
return traversal_logs
|
||||
|
||||
def run_test(self) -> bool:
|
||||
@@ -113,16 +103,16 @@ class TestClass:
|
||||
return "Method in test class"
|
||||
"""
|
||||
test_file_path = self.create_additional_test_file("chain_test.py", test_file_content)
|
||||
|
||||
|
||||
# Track all continuation IDs and their relationships
|
||||
conversation_chains = {}
|
||||
|
||||
|
||||
# === CHAIN A: Build linear conversation chain ===
|
||||
self.logger.info(" 🔗 Chain A: Building linear conversation chain")
|
||||
|
||||
|
||||
# Step A1: Start with chat tool (creates thread_id_1)
|
||||
self.logger.info(" Step A1: Chat tool - start new conversation")
|
||||
|
||||
|
||||
response_a1, continuation_id_a1 = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
@@ -138,11 +128,11 @@ class TestClass:
|
||||
return False
|
||||
|
||||
self.logger.info(f" ✅ Step A1 completed - thread_id: {continuation_id_a1[:8]}...")
|
||||
conversation_chains['A1'] = continuation_id_a1
|
||||
conversation_chains["A1"] = continuation_id_a1
|
||||
|
||||
# Step A2: Continue with analyze tool (creates thread_id_2 with parent=thread_id_1)
|
||||
self.logger.info(" Step A2: Analyze tool - continue Chain A")
|
||||
|
||||
|
||||
response_a2, continuation_id_a2 = self.call_mcp_tool(
|
||||
"analyze",
|
||||
{
|
||||
@@ -159,11 +149,11 @@ class TestClass:
|
||||
return False
|
||||
|
||||
self.logger.info(f" ✅ Step A2 completed - thread_id: {continuation_id_a2[:8]}...")
|
||||
conversation_chains['A2'] = continuation_id_a2
|
||||
conversation_chains["A2"] = continuation_id_a2
|
||||
|
||||
# Step A3: Continue with debug tool (creates thread_id_3 with parent=thread_id_2)
|
||||
# Step A3: Continue with debug tool (creates thread_id_3 with parent=thread_id_2)
|
||||
self.logger.info(" Step A3: Debug tool - continue Chain A")
|
||||
|
||||
|
||||
response_a3, continuation_id_a3 = self.call_mcp_tool(
|
||||
"debug",
|
||||
{
|
||||
@@ -180,14 +170,14 @@ class TestClass:
|
||||
return False
|
||||
|
||||
self.logger.info(f" ✅ Step A3 completed - thread_id: {continuation_id_a3[:8]}...")
|
||||
conversation_chains['A3'] = continuation_id_a3
|
||||
conversation_chains["A3"] = continuation_id_a3
|
||||
|
||||
# === CHAIN B: Start independent conversation ===
|
||||
self.logger.info(" 🔗 Chain B: Starting independent conversation")
|
||||
|
||||
|
||||
# Step B1: Start new chat conversation (creates thread_id_4, no parent)
|
||||
self.logger.info(" Step B1: Chat tool - start NEW independent conversation")
|
||||
|
||||
|
||||
response_b1, continuation_id_b1 = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
@@ -202,11 +192,11 @@ class TestClass:
|
||||
return False
|
||||
|
||||
self.logger.info(f" ✅ Step B1 completed - thread_id: {continuation_id_b1[:8]}...")
|
||||
conversation_chains['B1'] = continuation_id_b1
|
||||
conversation_chains["B1"] = continuation_id_b1
|
||||
|
||||
# Step B2: Continue the new conversation (creates thread_id_5 with parent=thread_id_4)
|
||||
self.logger.info(" Step B2: Analyze tool - continue Chain B")
|
||||
|
||||
|
||||
response_b2, continuation_id_b2 = self.call_mcp_tool(
|
||||
"analyze",
|
||||
{
|
||||
@@ -222,14 +212,14 @@ class TestClass:
|
||||
return False
|
||||
|
||||
self.logger.info(f" ✅ Step B2 completed - thread_id: {continuation_id_b2[:8]}...")
|
||||
conversation_chains['B2'] = continuation_id_b2
|
||||
conversation_chains["B2"] = continuation_id_b2
|
||||
|
||||
# === CHAIN A BRANCH: Go back to original conversation ===
|
||||
self.logger.info(" 🔗 Chain A Branch: Resume original conversation from A1")
|
||||
|
||||
|
||||
# Step A1-Branch: Use original continuation_id_a1 to branch (creates thread_id_6 with parent=thread_id_1)
|
||||
self.logger.info(" Step A1-Branch: Debug tool - branch from original Chain A")
|
||||
|
||||
|
||||
response_a1_branch, continuation_id_a1_branch = self.call_mcp_tool(
|
||||
"debug",
|
||||
{
|
||||
@@ -246,73 +236,79 @@ class TestClass:
|
||||
return False
|
||||
|
||||
self.logger.info(f" ✅ Step A1-Branch completed - thread_id: {continuation_id_a1_branch[:8]}...")
|
||||
conversation_chains['A1_Branch'] = continuation_id_a1_branch
|
||||
conversation_chains["A1_Branch"] = continuation_id_a1_branch
|
||||
|
||||
# === ANALYSIS: Validate thread relationships and history traversal ===
|
||||
self.logger.info(" 📊 Analyzing conversation chain structure...")
|
||||
|
||||
|
||||
# Get logs and extract thread relationships
|
||||
logs = self.get_recent_server_logs()
|
||||
thread_creation_logs = self.extract_thread_creation_logs(logs)
|
||||
history_traversal_logs = self.extract_history_traversal_logs(logs)
|
||||
|
||||
|
||||
self.logger.info(f" Found {len(thread_creation_logs)} thread creation logs")
|
||||
self.logger.info(f" Found {len(history_traversal_logs)} history traversal logs")
|
||||
|
||||
|
||||
# Debug: Show what we found
|
||||
if self.verbose:
|
||||
self.logger.debug(" Thread creation logs found:")
|
||||
for log in thread_creation_logs:
|
||||
self.logger.debug(f" {log['thread_id'][:8]}... parent: {log['parent_id'][:8] if log['parent_id'] else 'None'}...")
|
||||
self.logger.debug(
|
||||
f" {log['thread_id'][:8]}... parent: {log['parent_id'][:8] if log['parent_id'] else 'None'}..."
|
||||
)
|
||||
self.logger.debug(" History traversal logs found:")
|
||||
for log in history_traversal_logs:
|
||||
self.logger.debug(f" {log['thread_id'][:8]}... chain length: {log['chain_length']}")
|
||||
|
||||
|
||||
# Build expected thread relationships
|
||||
expected_relationships = []
|
||||
|
||||
|
||||
# Note: A1 and B1 won't appear in thread creation logs because they're new conversations (no parent)
|
||||
# Only continuation threads (A2, A3, B2, A1-Branch) will appear in creation logs
|
||||
|
||||
|
||||
# Find logs for each continuation thread
|
||||
a2_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_a2), None)
|
||||
a3_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_a3), None)
|
||||
b2_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_b2), None)
|
||||
a1_branch_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_a1_branch), None)
|
||||
|
||||
a2_log = next((log for log in thread_creation_logs if log["thread_id"] == continuation_id_a2), None)
|
||||
a3_log = next((log for log in thread_creation_logs if log["thread_id"] == continuation_id_a3), None)
|
||||
b2_log = next((log for log in thread_creation_logs if log["thread_id"] == continuation_id_b2), None)
|
||||
a1_branch_log = next(
|
||||
(log for log in thread_creation_logs if log["thread_id"] == continuation_id_a1_branch), None
|
||||
)
|
||||
|
||||
# A2 should have A1 as parent
|
||||
if a2_log:
|
||||
expected_relationships.append(("A2 has A1 as parent", a2_log['parent_id'] == continuation_id_a1))
|
||||
|
||||
expected_relationships.append(("A2 has A1 as parent", a2_log["parent_id"] == continuation_id_a1))
|
||||
|
||||
# A3 should have A2 as parent
|
||||
if a3_log:
|
||||
expected_relationships.append(("A3 has A2 as parent", a3_log['parent_id'] == continuation_id_a2))
|
||||
|
||||
expected_relationships.append(("A3 has A2 as parent", a3_log["parent_id"] == continuation_id_a2))
|
||||
|
||||
# B2 should have B1 as parent (independent chain)
|
||||
if b2_log:
|
||||
expected_relationships.append(("B2 has B1 as parent", b2_log['parent_id'] == continuation_id_b1))
|
||||
|
||||
expected_relationships.append(("B2 has B1 as parent", b2_log["parent_id"] == continuation_id_b1))
|
||||
|
||||
# A1-Branch should have A1 as parent (branching)
|
||||
if a1_branch_log:
|
||||
expected_relationships.append(("A1-Branch has A1 as parent", a1_branch_log['parent_id'] == continuation_id_a1))
|
||||
|
||||
expected_relationships.append(
|
||||
("A1-Branch has A1 as parent", a1_branch_log["parent_id"] == continuation_id_a1)
|
||||
)
|
||||
|
||||
# Validate history traversal
|
||||
traversal_validations = []
|
||||
|
||||
|
||||
# History traversal logs are only generated when conversation history is built from scratch
|
||||
# (not when history is already embedded in the prompt by server.py)
|
||||
# So we should expect at least 1 traversal log, but not necessarily for every continuation
|
||||
|
||||
|
||||
if len(history_traversal_logs) > 0:
|
||||
# Validate that any traversal logs we find have reasonable chain lengths
|
||||
for log in history_traversal_logs:
|
||||
thread_id = log['thread_id']
|
||||
chain_length = log['chain_length']
|
||||
|
||||
thread_id = log["thread_id"]
|
||||
chain_length = log["chain_length"]
|
||||
|
||||
# Chain length should be at least 2 for any continuation thread
|
||||
# (original thread + continuation thread)
|
||||
is_valid_length = chain_length >= 2
|
||||
|
||||
|
||||
# Try to identify which thread this is for better validation
|
||||
thread_description = "Unknown thread"
|
||||
if thread_id == continuation_id_a2:
|
||||
@@ -327,12 +323,16 @@ class TestClass:
|
||||
elif thread_id == continuation_id_a1_branch:
|
||||
thread_description = "A1-Branch (should be 2-thread chain)"
|
||||
is_valid_length = chain_length == 2
|
||||
|
||||
traversal_validations.append((f"{thread_description[:8]}... has valid chain length", is_valid_length))
|
||||
|
||||
|
||||
traversal_validations.append(
|
||||
(f"{thread_description[:8]}... has valid chain length", is_valid_length)
|
||||
)
|
||||
|
||||
# Also validate we found at least one traversal (shows the system is working)
|
||||
traversal_validations.append(("At least one history traversal occurred", len(history_traversal_logs) >= 1))
|
||||
|
||||
traversal_validations.append(
|
||||
("At least one history traversal occurred", len(history_traversal_logs) >= 1)
|
||||
)
|
||||
|
||||
# === VALIDATION RESULTS ===
|
||||
self.logger.info(" 📊 Thread Relationship Validation:")
|
||||
relationship_passed = 0
|
||||
@@ -341,7 +341,7 @@ class TestClass:
|
||||
self.logger.info(f" {status} {desc}")
|
||||
if passed:
|
||||
relationship_passed += 1
|
||||
|
||||
|
||||
self.logger.info(" 📊 History Traversal Validation:")
|
||||
traversal_passed = 0
|
||||
for desc, passed in traversal_validations:
|
||||
@@ -349,31 +349,35 @@ class TestClass:
|
||||
self.logger.info(f" {status} {desc}")
|
||||
if passed:
|
||||
traversal_passed += 1
|
||||
|
||||
|
||||
# === SUCCESS CRITERIA ===
|
||||
total_relationship_checks = len(expected_relationships)
|
||||
total_traversal_checks = len(traversal_validations)
|
||||
|
||||
self.logger.info(f" 📊 Validation Summary:")
|
||||
|
||||
self.logger.info(" 📊 Validation Summary:")
|
||||
self.logger.info(f" Thread relationships: {relationship_passed}/{total_relationship_checks}")
|
||||
self.logger.info(f" History traversal: {traversal_passed}/{total_traversal_checks}")
|
||||
|
||||
|
||||
# Success requires at least 80% of validations to pass
|
||||
relationship_success = relationship_passed >= (total_relationship_checks * 0.8)
|
||||
|
||||
|
||||
# If no traversal checks were possible, it means no traversal logs were found
|
||||
# This could indicate an issue since we expect at least some history building
|
||||
if total_traversal_checks == 0:
|
||||
self.logger.warning(" No history traversal logs found - this may indicate conversation history is always pre-embedded")
|
||||
self.logger.warning(
|
||||
" No history traversal logs found - this may indicate conversation history is always pre-embedded"
|
||||
)
|
||||
# Still consider it successful since the thread relationships are what matter most
|
||||
traversal_success = True
|
||||
else:
|
||||
traversal_success = traversal_passed >= (total_traversal_checks * 0.8)
|
||||
|
||||
|
||||
overall_success = relationship_success and traversal_success
|
||||
|
||||
self.logger.info(f" 📊 Conversation Chain Structure:")
|
||||
self.logger.info(f" Chain A: {continuation_id_a1[:8]} → {continuation_id_a2[:8]} → {continuation_id_a3[:8]}")
|
||||
|
||||
self.logger.info(" 📊 Conversation Chain Structure:")
|
||||
self.logger.info(
|
||||
f" Chain A: {continuation_id_a1[:8]} → {continuation_id_a2[:8]} → {continuation_id_a3[:8]}"
|
||||
)
|
||||
self.logger.info(f" Chain B: {continuation_id_b1[:8]} → {continuation_id_b2[:8]}")
|
||||
self.logger.info(f" Branch: {continuation_id_a1[:8]} → {continuation_id_a1_branch[:8]}")
|
||||
|
||||
@@ -394,13 +398,13 @@ class TestClass:
|
||||
def main():
|
||||
"""Run the conversation chain validation test"""
|
||||
import sys
|
||||
|
||||
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
test = ConversationChainValidationTest(verbose=verbose)
|
||||
|
||||
|
||||
success = test.run_test()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user