WIP major refactor and features
This commit is contained in:
@@ -37,6 +37,7 @@ class BasicConversationTest(BaseSimulatorTest):
|
||||
{
|
||||
"prompt": "Please use low thinking mode. Analyze this Python code and explain what it does",
|
||||
"files": [self.test_files["python"]],
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -54,6 +55,7 @@ class BasicConversationTest(BaseSimulatorTest):
|
||||
"prompt": "Please use low thinking mode. Now focus on the Calculator class specifically. Are there any improvements you'd suggest?",
|
||||
"files": [self.test_files["python"]], # Same file - should be deduplicated
|
||||
"continuation_id": continuation_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -69,6 +71,7 @@ class BasicConversationTest(BaseSimulatorTest):
|
||||
"prompt": "Please use low thinking mode. Now also analyze this configuration file and see how it might relate to the Python code",
|
||||
"files": [self.test_files["python"], self.test_files["config"]],
|
||||
"continuation_id": continuation_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ DATABASE_CONFIG = {
|
||||
{
|
||||
"path": os.getcwd(),
|
||||
"files": [validation_file],
|
||||
"original_request": "Test for content duplication in precommit tool",
|
||||
"prompt": "Test for content duplication in precommit tool",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -116,16 +116,18 @@ DATABASE_CONFIG = {
|
||||
{
|
||||
"prompt": "Please use low thinking mode. Analyze this config file",
|
||||
"files": [validation_file],
|
||||
"model": "flash",
|
||||
}, # Using absolute path
|
||||
),
|
||||
(
|
||||
"codereview",
|
||||
{
|
||||
"files": [validation_file],
|
||||
"context": "Please use low thinking mode. Review this configuration",
|
||||
"prompt": "Please use low thinking mode. Review this configuration",
|
||||
"model": "flash",
|
||||
}, # Using absolute path
|
||||
),
|
||||
("analyze", {"files": [validation_file], "analysis_type": "code_quality"}), # Using absolute path
|
||||
("analyze", {"files": [validation_file], "analysis_type": "code_quality", "model": "flash"}), # Using absolute path
|
||||
]
|
||||
|
||||
for tool_name, params in tools_to_test:
|
||||
@@ -163,6 +165,7 @@ DATABASE_CONFIG = {
|
||||
"prompt": "Please use low thinking mode. Continue analyzing this configuration file",
|
||||
"files": [validation_file], # Same file should be deduplicated
|
||||
"continuation_id": thread_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -91,6 +91,7 @@ def hash_pwd(pwd):
|
||||
"prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?",
|
||||
"files": [auth_file],
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response1, continuation_id1 = self.call_mcp_tool("chat", chat_params)
|
||||
@@ -106,8 +107,9 @@ def hash_pwd(pwd):
|
||||
self.logger.info(" Step 2: analyze tool - Deep code analysis (fresh)")
|
||||
analyze_params = {
|
||||
"files": [auth_file],
|
||||
"question": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
|
||||
"prompt": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params)
|
||||
@@ -127,6 +129,7 @@ def hash_pwd(pwd):
|
||||
"prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?",
|
||||
"files": [auth_file, config_file_path], # Old + new file
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response3, _ = self.call_mcp_tool("chat", chat_continue_params)
|
||||
@@ -141,8 +144,9 @@ def hash_pwd(pwd):
|
||||
self.logger.info(" Step 4: debug tool - Identify specific problems")
|
||||
debug_params = {
|
||||
"files": [auth_file, config_file_path],
|
||||
"error_description": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
|
||||
"prompt": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response4, continuation_id4 = self.call_mcp_tool("debug", debug_params)
|
||||
@@ -161,8 +165,9 @@ def hash_pwd(pwd):
|
||||
debug_continue_params = {
|
||||
"continuation_id": continuation_id4,
|
||||
"files": [auth_file, config_file_path],
|
||||
"error_description": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
|
||||
"prompt": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response5, _ = self.call_mcp_tool("debug", debug_continue_params)
|
||||
@@ -174,8 +179,9 @@ def hash_pwd(pwd):
|
||||
self.logger.info(" Step 6: codereview tool - Comprehensive code review")
|
||||
codereview_params = {
|
||||
"files": [auth_file, config_file_path],
|
||||
"context": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
|
||||
"prompt": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params)
|
||||
@@ -207,7 +213,7 @@ def secure_login(user, pwd):
|
||||
precommit_params = {
|
||||
"path": self.test_dir,
|
||||
"files": [auth_file, config_file_path, improved_file],
|
||||
"original_request": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
|
||||
"prompt": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
|
||||
"thinking_mode": "low",
|
||||
}
|
||||
|
||||
|
||||
@@ -67,6 +67,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
{
|
||||
"prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
|
||||
"files": [self.test_files["python"]],
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -81,6 +82,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
"prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
|
||||
"files": [self.test_files["python"]], # Same file should be deduplicated
|
||||
"continuation_id": chat_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -93,8 +95,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
"codereview",
|
||||
{
|
||||
"files": [self.test_files["python"]], # Same file should be deduplicated
|
||||
"context": "Building on our previous analysis, provide a comprehensive code review",
|
||||
"prompt": "Building on our previous analysis, provide a comprehensive code review",
|
||||
"continuation_id": chat_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -116,7 +119,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
|
||||
# Start with analyze
|
||||
analyze_response, analyze_id = self.call_mcp_tool(
|
||||
"analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality"}
|
||||
"analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality", "model": "flash"}
|
||||
)
|
||||
|
||||
if not analyze_response or not analyze_id:
|
||||
@@ -128,8 +131,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
"debug",
|
||||
{
|
||||
"files": [self.test_files["python"]], # Same file should be deduplicated
|
||||
"issue_description": "Based on our analysis, help debug the performance issue in fibonacci",
|
||||
"prompt": "Based on our analysis, help debug the performance issue in fibonacci",
|
||||
"continuation_id": analyze_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -144,6 +148,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
"prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
|
||||
"files": [self.test_files["python"]], # Same file should be deduplicated
|
||||
"continuation_id": analyze_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -169,6 +174,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
{
|
||||
"prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
|
||||
"files": [self.test_files["python"], self.test_files["config"]],
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -181,8 +187,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
||||
"codereview",
|
||||
{
|
||||
"files": [self.test_files["python"], self.test_files["config"]], # Same files
|
||||
"context": "Review both files in the context of our previous discussion",
|
||||
"prompt": "Review both files in the context of our previous discussion",
|
||||
"continuation_id": multi_id,
|
||||
"model": "flash",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -100,8 +100,9 @@ def divide(x, y):
|
||||
precommit_params = {
|
||||
"path": self.test_dir, # Required path parameter
|
||||
"files": [dummy_file_path],
|
||||
"original_request": "Please give me a quick one line reply. Review this code for commit readiness",
|
||||
"prompt": "Please give me a quick one line reply. Review this code for commit readiness",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
|
||||
@@ -124,8 +125,9 @@ def divide(x, y):
|
||||
self.logger.info(" Step 2: codereview tool with same file (fresh conversation)")
|
||||
codereview_params = {
|
||||
"files": [dummy_file_path],
|
||||
"context": "Please give me a quick one line reply. General code review for quality and best practices",
|
||||
"prompt": "Please give me a quick one line reply. General code review for quality and best practices",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response2, _ = self.call_mcp_tool("codereview", codereview_params)
|
||||
@@ -150,8 +152,9 @@ def subtract(a, b):
|
||||
"continuation_id": continuation_id,
|
||||
"path": self.test_dir, # Required path parameter
|
||||
"files": [dummy_file_path, new_file_path], # Old + new file
|
||||
"original_request": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
|
||||
"prompt": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
|
||||
"thinking_mode": "low",
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response3, _ = self.call_mcp_tool("precommit", continue_params)
|
||||
|
||||
Reference in New Issue
Block a user