Lots of tests with live simulation to validate conversation continuation / preservation work across requests

This commit is contained in:
Fahad
2025-06-11 17:16:05 +04:00
parent c90ac7561e
commit 780000f9c9
15 changed files with 272 additions and 2296 deletions

View File

@@ -32,16 +32,22 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
(
"thinkdeep",
{
"prompt": "Please use low thinking mode. Think deeply about this Python code and identify potential architectural improvements",
"current_analysis": "Please use low thinking mode. I'm analyzing this Python code to identify potential architectural improvements",
"files": [self.test_files["python"]],
},
),
("analyze", {"files": [self.test_files["python"]], "analysis_type": "architecture"}),
(
"analyze",
{
"files": [self.test_files["python"]],
"question": "Please use low thinking mode. What are the architectural patterns in this code?",
},
),
(
"debug",
{
"files": [self.test_files["python"]],
"issue_description": "The fibonacci function seems slow for large numbers",
"error_description": "Please use low thinking mode. The fibonacci function seems slow for large numbers",
},
),
(
@@ -74,11 +80,17 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
continue_params["continuation_id"] = continuation_id
if tool_name == "thinkdeep":
continue_params["prompt"] = "Please use low thinking mode. Now focus specifically on the recursive fibonacci implementation"
continue_params["current_analysis"] = (
"Please use low thinking mode. Now focus specifically on the recursive fibonacci implementation"
)
elif tool_name == "analyze":
continue_params["analysis_type"] = "performance"
continue_params["question"] = (
"Please use low thinking mode. What are the performance characteristics of this code?"
)
elif tool_name == "debug":
continue_params["issue_description"] = "How can we optimize the fibonacci function?"
continue_params["error_description"] = (
"Please use low thinking mode. How can we optimize the fibonacci function?"
)
elif tool_name == "codereview":
continue_params["context"] = "Focus on the Calculator class implementation"
@@ -89,8 +101,10 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
else:
self.logger.warning(f" ⚠️ {tool_name} tool continuation failed")
self.logger.info(f" ✅ Per-tool file deduplication tests completed: {successful_tests}/{total_tests} tools passed")
self.logger.info(
f" ✅ Per-tool file deduplication tests completed: {successful_tests}/{total_tests} tools passed"
)
# Consider test successful if at least one tool worked
return successful_tests > 0
@@ -98,4 +112,4 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
self.logger.error(f"Per-tool file deduplication test failed: {e}")
return False
finally:
self.cleanup_test_files()
self.cleanup_test_files()